<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMIR</journal-id>
      <journal-id journal-id-type="nlm-ta">J Med Internet Res</journal-id>
      <journal-title>Journal of Medical Internet Research</journal-title>
      <issn pub-type="epub">1438-8871</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v27i1e65903</article-id>
      <article-id pub-id-type="pmid">40163858</article-id>
      <article-id pub-id-type="doi">10.2196/65903</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Exploring the Capacity of Large Language Models to Assess the Chronic Pain Experience: Algorithm Development and Validation</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Coristine</surname>
            <given-names>Andrew</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Ehtesham</surname>
            <given-names>Zarmina</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Akindahunsi</surname>
            <given-names>Taiwo</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Chen</surname>
            <given-names>Qingquan</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Frosolini</surname>
            <given-names>Andrea</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author">
          <name name-style="western">
            <surname>Amidei</surname>
            <given-names>Jacopo</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-8846-252X</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Nieto</surname>
            <given-names>Rubén</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <address>
            <institution>eHealth Lab Research Group</institution>
            <institution>Faculty of Psychology and Educational Sciences</institution>
            <institution>Universitat Oberta de Catalunya</institution>
            <addr-line>Rambla del Poblenou, 156</addr-line>
            <addr-line>Barcelona, 08018</addr-line>
            <country>Spain</country>
            <phone>34 933263538</phone>
            <email>rnietol@uoc.edu</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-8759-319X</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Kaltenbrunner</surname>
            <given-names>Andreas</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-2271-3066</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Ferreira De Sá</surname>
            <given-names>Jose Gregorio</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0003-0008-0290</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author">
          <name name-style="western">
            <surname>Serrat</surname>
            <given-names>Mayte</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <xref rid="aff4" ref-type="aff">4</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-5591-9407</ext-link>
        </contrib>
        <contrib id="contrib6" contrib-type="author">
          <name name-style="western">
            <surname>Albajes</surname>
            <given-names>Klara</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff5" ref-type="aff">5</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-4373-3402</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>AI and Data for Society Research Group</institution>
        <institution>Internet Interdisciplinary Institute</institution>
        <institution>Universitat Oberta de Catalunya</institution>
        <addr-line>Barcelona</addr-line>
        <country>Spain</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>eHealth Lab Research Group</institution>
        <institution>Faculty of Psychology and Educational Sciences</institution>
        <institution>Universitat Oberta de Catalunya</institution>
        <addr-line>Barcelona</addr-line>
        <country>Spain</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>Unitat d’Expertesa en Síndromes de Sensibilització Central</institution>
        <institution>Servei de Reumatologia</institution>
        <institution>Vall d'Hebron Hospital Universitari</institution>
        <addr-line>Barcelona</addr-line>
        <country>Spain</country>
      </aff>
      <aff id="aff4">
        <label>4</label>
        <institution>Escola Universitària de Fisioteràpia</institution>
        <institution>Escoles Universitàries Gimbernat</institution>
        <addr-line>Barcelona</addr-line>
        <country>Spain</country>
      </aff>
      <aff id="aff5">
        <label>5</label>
        <institution>Psyclinic Mental Health</institution>
        <addr-line>Barcelona</addr-line>
        <country>Spain</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Rubén Nieto <email>rnietol@uoc.edu</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <year>2025</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>31</day>
        <month>3</month>
        <year>2025</year>
      </pub-date>
      <volume>27</volume>
      <elocation-id>e65903</elocation-id>
      <history>
        <date date-type="received">
          <day>29</day>
          <month>8</month>
          <year>2024</year>
        </date>
        <date date-type="rev-request">
          <day>8</day>
          <month>11</month>
          <year>2024</year>
        </date>
        <date date-type="rev-recd">
          <day>4</day>
          <month>12</month>
          <year>2024</year>
        </date>
        <date date-type="accepted">
          <day>17</day>
          <month>1</month>
          <year>2025</year>
        </date>
      </history>
      <copyright-statement>©Jacopo Amidei, Rubén Nieto, Andreas Kaltenbrunner, Jose Gregorio Ferreira De Sá, Mayte Serrat, Klara Albajes. Originally published in the Journal of Medical Internet Research (https://www.jmir.org), 31.03.2025.</copyright-statement>
      <copyright-year>2025</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in the Journal of Medical Internet Research (ISSN 1438-8871), is properly cited. The complete bibliographic information, a link to the original publication on https://www.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://www.jmir.org/2025/1/e65903" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Chronic pain, affecting more than 20% of the global population, has an enormous pernicious impact on individuals as well as economic ramifications at both the health and social levels. Accordingly, tools that enhance pain assessment can considerably impact people suffering from pain and society at large. In this context, assessment methods based on individuals’ personal experiences, such as written narratives (WNs), offer relevant insights into understanding pain from a personal perspective. This approach can uncover subjective, intricate, and multifaceted aspects that standardized questionnaires can overlook. However, WNs can be time-consuming for clinicians. Therefore, a tool that uses WNs while reducing the time required for their evaluation could have a significantly beneficial impact on people's pain assessment.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>This study is the first evaluation of the potential of applying large language models (LLMs) to assist clinicians in assessing patients’ pain expressed through WNs.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>We performed an experiment based on 43 WNs made by people with fibromyalgia and qualitatively evaluated in a prior study. Focusing on pain severity and disability, we prompt GPT-4 (with temperature parameter settings 0 or 1) to assign scores and scores’ explanations, to these WNs. Then, we quantitatively compare GPT-4 scores with experts’ scores of the same narratives, using statistical measures such as Pearson correlations, root mean squared error, the weighted version of the Gwet agreement coefficient, and Krippendorff α. Additionally, 2 experts specialized in chronic pain conducted a qualitative analysis of the scores’ explanation to assess their accuracy and potential applicability of GPT’s analysis for future pain narrative evaluations.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>Our analysis reveals that GPT-4’s performance in assessing pain narratives yielded promising results. GPT-4 was comparable in terms of agreement with experts (with a weighted percentage agreement higher than 0.95), correlations with standardized measurements (for example in the range of 0.43 and 0.49 between the Revised Fibromyalgia Impact Questionnaire and GTP-4 with temperatures 1), and low error rates (root mean squared error of 1.20 for severity and 1.44 for disability). Moreover, experts generally deemed the ratings provided by GPT-4, as well as the scores’ explanation, to be adequate. However, we observe that GPT has a slight tendency to overestimate pain severity and disability with a lower SD than expert estimates.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>These findings underline the potential of LLMs in facilitating the assessment of WNs of people with fibromyalgia, offering a novel approach to understanding and evaluating patient pain experiences. Integrating automated assessments through LLMs presents opportunities for streamlining and enhancing the assessment process, paving the way for improved patient care and tailored interventions in the chronic pain management field.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>large language models</kwd>
        <kwd>fibromyalgia</kwd>
        <kwd>chronic pain</kwd>
        <kwd>written narratives</kwd>
        <kwd>pain narratives</kwd>
        <kwd>automated assessment</kwd>
        <kwd>pain severity</kwd>
        <kwd>pain disability</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <p>Chronic pain poses a widespread challenge, affecting more than 20% of the global population [<xref ref-type="bibr" rid="ref1">1</xref>-<xref ref-type="bibr" rid="ref4">4</xref>]. Such kinds of pain are associated with restrictions in daily activities, disrupting normal functionality, and diminishing the overall quality of life. Its repercussions extend to disruptions in familial, professional, and social domains [<xref ref-type="bibr" rid="ref3">3</xref>]. Additionally, persistent pain is often linked to mental health issues, such as depression or anxiety disorders [<xref ref-type="bibr" rid="ref5">5</xref>]. As reported in several studies [<xref ref-type="bibr" rid="ref6">6</xref>,<xref ref-type="bibr" rid="ref7">7</xref>], pain is one of the main reasons for medical consultations, and people with chronic pain use a high quantity of health care services (especially those with disabling pain problems). Accordingly, pain significantly contributes to the demand for medical services, imposing a noteworthy economic burden on both, individuals experiencing the pain and society at large. Therefore, the economic ramifications at both the health and social levels are substantial. For example, estimates suggest that in Spain alone, the annual total cost, encompassing both direct and indirect expenses, could reach €16 billion (US $17.27 billion) [<xref ref-type="bibr" rid="ref8">8</xref>].</p>
      <p>Tools that enhance the assessment to understand better people with pain experiences are essential. Only with an adequate assessment, it is possible to determine the best intervention approach for each person and to evaluate the effects of interventions. Along these lines, standardized instruments (mainly self-reported measures) and established procedures exist that are available and recommended for routine assessment of people with pain. More specifically, experts recommend taking into account different domains (eg, pain severity, interference with daily activities, emotional functioning), and using some specific self-report measurements [<xref ref-type="bibr" rid="ref9">9</xref>,<xref ref-type="bibr" rid="ref10">10</xref>]. However, self-reported measures are limited to a specific domain and cannot capture the subjectivity of the pain experience. For this reason, assessment procedures based on people’s own experiences have been claimed as valuable better to understand the pain experience from a personal lens. In fact, a qualitative approach can capture subjective, intricate, and multifaceted details that standardized questionnaires can fail to capture [<xref ref-type="bibr" rid="ref11">11</xref>]. In the end, pain is always an internal and subjective experience, providing qualitative reports with richer content to understand globally this subjective experience from the observer’s point of view [<xref ref-type="bibr" rid="ref12">12</xref>].</p>
      <p>In this context, the narrative methodology holds significant potential in understanding the experiences of individuals living with pain [<xref ref-type="bibr" rid="ref13">13</xref>]. This methodology allows researchers and clinicians to glean insights from individuals’ personal stories, allowing them to highlight crucial aspects of their perspectives by using their own words [<xref ref-type="bibr" rid="ref14">14</xref>,<xref ref-type="bibr" rid="ref15">15</xref>]. Examples of prior studies using narrative methodology included the study by Noel et al [<xref ref-type="bibr" rid="ref16">16</xref>] who interviewed parents of young people with chronic pain, using a mixed pain sample to extract and analyze the patient’s narratives. Similarly, Meldrum et al [<xref ref-type="bibr" rid="ref17">17</xref>] conducted a narrative analysis with semistructured interviews with children experiencing chronic pain, both at preintervention and 6-12 months post clinic intake. The use of narrative methodology has extended to written texts as well. McGowan et al [<xref ref-type="bibr" rid="ref18">18</xref>] solicited written narratives (WNs) from women with chronic pelvic pain, while Dysvik et al [<xref ref-type="bibr" rid="ref19">19</xref>] examined WNs from individuals with chronic pain 6 years after completing a pain management intervention. More recently, WNs were used to explore the experience of children with functional abdominal pain and their parents [<xref ref-type="bibr" rid="ref20">20</xref>], adults with neck or back pain [<xref ref-type="bibr" rid="ref21">21</xref>], and people with fibromyalgia [<xref ref-type="bibr" rid="ref22">22</xref>]. In a related manner, Kathan et al [<xref ref-type="bibr" rid="ref23">23</xref>] used a qualitative survey in which people were asked to respond to questions about pain acceptability by using their own words.</p>
      <p>The growing interest in WNs is mainly due to the fact that asking for writing content to people suffering from a condition offers distinct advantages over oral inquiries. Primarily because writing facilitates the organization of ideas related to complex emotional experiences such as pain [<xref ref-type="bibr" rid="ref24">24</xref>]. Additionally, WNs offer a time-efficient way to explore the subjective nature of pain, as individuals can complete the writing outside of health care consultations, making it an accessible initial approach to this complex phenomenon [<xref ref-type="bibr" rid="ref20">20</xref>]. However, on the other hand, analyzing the content expressed by people with pain can be time-consuming for clinicians and researchers. For this reason, this study explores the feasibility of using artificial intelligence (AI) and in particular large language models (LLMs) to assess WNs of people with chronic pain.</p>
      <p>AI has been used in the assessment of the pain field in some studies, as presented in the recent review by Abd-Elsayedin et al [<xref ref-type="bibr" rid="ref25">25</xref>]. In this review, studies were found to address the following purposes: (1) diagnostic aid, (2) modeling pain progression, (3) predicting pain treatment response, and (4) improving treatment and pain maintenance. To achieve these, most of the studies used machine learning techniques. Although none of the reviewed studies used LLMs, there are few works available using them for pain research. Vaid et al [<xref ref-type="bibr" rid="ref26">26</xref>], used locally running, privacy-preserving LLMs capable of following plain language instructions to extract characteristics of musculoskeletal pain (such as location and acuity) from a heterogeneous collection of unstructured clinical notes. The study used multiple patient notes, coded by 2 health care professionals, and found great precisions of the system in classifying pain location and acuity. In another study, Shrestha et al [<xref ref-type="bibr" rid="ref27">27</xref>] tested the responses of GPT to clinical questions and recommendations based on an established clinical guideline. They found that the system was able to make clinical recommendations for low back pain, although it was not exempt from errors. In the same line, Gianola et al [<xref ref-type="bibr" rid="ref28">28</xref>] tested the ability of GPT against clinical practice guidelines to answer clinical questions about lumbosacral radicular pain. They found negative results since the internal consistency was low, as well as the precision to follow the clinical guidelines for recommendations.</p>
      <p>Using LLMs for the assessment of WNs provides several advantages, such as decreasing assessment time for clinicians —where LLMs serve as clinician assistants—and allowing language flexibility—meaning our methodology can be applied to various languages. This study can contribute to the open debate about the potential application of LLMs for health. While there is existing literature supporting benefits such as the capabilities for analyzing massive data, there are also studies showing disadvantages such as inaccuracies with the use of LLMs [<xref ref-type="bibr" rid="ref29">29</xref>].</p>
      <p>To evaluate LLMs’ capacity to assess pain narratives we used the narratives provided in Serrat et al [<xref ref-type="bibr" rid="ref22">22</xref>]. In our investigation, GPT-4 was used to assign scores, as well as the scores’ explanation, for pain severity and disability as expressed in the narratives. Subsequently, we conducted a quantitative analysis by comparing these scores with expert ratings reported by Serrat et al [<xref ref-type="bibr" rid="ref22">22</xref>], using statistical measures such as Pearson correlations, root-mean-squared error (RMSE), the weighted version of Gwet agreement coefficient (Gwet AC2) [<xref ref-type="bibr" rid="ref30">30</xref>], and Krippendorff α [<xref ref-type="bibr" rid="ref31">31</xref>]. Additionally, a qualitative analysis of the scores’ explanation was performed by consulting 2 experts specialized in chronic pain, who evaluated the accuracy of GPT's pain assessment and its potential use for future pain WNs assessments. Altogether, the primary contribution of this paper is its pioneering exploration of applying LLMs for WN assessment, validated through both automatic and human evaluation methods. To the best of our knowledge, this paper is the first attempt at using LLMs to assess pain WNs.</p>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Procedure</title>
        <p>This study uses participant data from a previous qualitative study [<xref ref-type="bibr" rid="ref22">22</xref>], which explored the value of WNs for understanding the experience of people with fibromyalgia. Specifically, we used patient WNs, their assessments by 2 experts, and patient answers on standardized questionnaires to reanalyze the data using GPT-4 and compare its outcomes with the previous results. We have chosen GPT-4 as it is widely considered the state-of-the-art proprietary LLM model, which has been successfully applied in several medical contexts. See, for example, a recent study by Goh et al [<xref ref-type="bibr" rid="ref32">32</xref>] on the use of GPT for diagnostic reasoning.</p>
      </sec>
      <sec>
        <title>Dataset</title>
        <p>A total of 46 people completed the WNs task in Serrat et al [<xref ref-type="bibr" rid="ref22">22</xref>]. The inclusion criteria for these participants were (1) fulfillment of the 2010/2011 American College of Rheumatology Fibromyalgia diagnostic criteria [<xref ref-type="bibr" rid="ref33">33</xref>] and (2) age of 18 years or older. The exclusion criteria were having terminal illnesses or programmed interventions that might interrupt the study.</p>
        <p>In this study, we selected data from 43 participants, that is, the ones written in Spanish (3 participants who did the task in a different language were excluded). They were requested to write about their pain experiences with the objective of capturing their personal viewpoints. A sheet was provided to participants informing them about that, and the following cues/points to compose the narrative were provided as follows.</p>
        <list list-type="bullet">
          <list-item>
            <p>Describe your pain now and how you manage it.</p>
          </list-item>
          <list-item>
            <p>Describe your pain over time: how it began, if it has changed or stayed the same, and what has affected changes over time.</p>
          </list-item>
          <list-item>
            <p>Describe your feelings and how pain has made you feel (in the family, work, and social contexts) over time.</p>
          </list-item>
          <list-item>
            <p>Describe how pain has affected your daily life over time.</p>
          </list-item>
          <list-item>
            <p>Describe whether pain has affected the way you see your future and the things you would like to do.</p>
          </list-item>
          <list-item>
            <p>Describe the treatments you have followed and what effect(s) they have had.</p>
          </list-item>
        </list>
        <p>Participants were explained that these cues were just tentative, and they could choose what to explain. They were given the option to complete the task in the language most comfortable and convenient for them and to write by hand (in this case WNs were transcribed for the analyses) or by using digital methods [<xref ref-type="bibr" rid="ref22">22</xref>]. Participants were also asked to complete the following questionnaires.</p>
        <list list-type="bullet">
          <list-item>
            <p>Revised Fibromyalgia Impact Questionnaire (FIQR): A 20-item questionnaire that measures functional impairment over the last 7 days. It has 3 dimensions: physical dysfunction (scores from 0 to 30), overall impact (scores from 0 to 20), and intensity of symptoms (scores from 0 to 50). The total sum of these scores ranges from 0 to 100, and higher scores indicate a greater impact. The Spanish version shows adequate internal consistency (Cronbach α=0.93) [<xref ref-type="bibr" rid="ref34">34</xref>].</p>
          </list-item>
          <list-item>
            <p>Hospital Anxiety and Depression Scale (HADS): A commonly used questionnaire that evaluates the severity of anxiety and depression symptoms by 2 scales (each consisting of 7 items). Scores on each scale range from 0 to 21, with higher scores indicating greater severity of symptoms. The Spanish version has shown adequate internal consistency both for anxiety (Cronbach α=0.83) and depression (Cronbach α=0.87) subscales in individuals with fibromyalgia [<xref ref-type="bibr" rid="ref35">35</xref>].</p>
          </list-item>
          <list-item>
            <p>Tampa Scale for Kinesiophobia: A scale composed of 11 items, to be answered on a 4-point Likert scale. The scale quantifies fear of movement, injury, or reinjury. Its total scores can range from 11 to 44, where higher scores indicate a greater fear of pain and movement. The Spanish version shows adequate internal consistency (Cronbach α=0.79) [<xref ref-type="bibr" rid="ref36">36</xref>].</p>
          </list-item>
        </list>
        <p>In Serrat et al [<xref ref-type="bibr" rid="ref22">22</xref>], 2 independent reviewers (with expertise in pain, 1 psychologist and 1 physiotherapist) assessed the level of severity and disability expressed in the WNs on a scale from 0=indicating absence to 10=representing maximum levels. To ensure consistency in their evaluations, severity was defined as “the perceived magnitude of fibromyalgia concerning pain and overall suffering conveyed in each participant text.” Disability was defined as “the perceived extent to which fibromyalgia disrupts the usual activities and life of the writers.”</p>
      </sec>
      <sec>
        <title>Ethical Considerations</title>
        <p>Our analysis uses anonymized data from the study by Serrat et al [<xref ref-type="bibr" rid="ref22">22</xref>]. They received ethical approval for performing their study from the ethics committee of the Vall d’Hebron Hospital, Barcelona (reference: PR[AG]99/2022). Participants were asked first to complete the FIQR, HADS, and Tampa Scale for Kinesiophobia self-reported measurements and then to write a WN to describe their pain experience. Participants in the Serrat et al [<xref ref-type="bibr" rid="ref22">22</xref>] study were asked for their informed consent, according to which the collected data could be used only for research purposes. No compensation was provided to the participants by Serrat et al [<xref ref-type="bibr" rid="ref22">22</xref>] study nor for the human evaluation performed in this study.</p>
      </sec>
      <sec>
        <title>Experiments With GPT</title>
        <p>We tasked GPT-4 to evaluate the WNs one by one. Specifically, we prompted GPT-4 to provide a score for pain severity and disability, as well as an explanation for the scores. Both scores were requested on a scale from 0 to 10 to compare with human assessments performed by Serrat et al [<xref ref-type="bibr" rid="ref22">22</xref>]. Pain severity and disability are among the main outcome variables assessed in the pain field, and 11-point scales are, as recommended by experts [<xref ref-type="bibr" rid="ref9">9</xref>,<xref ref-type="bibr" rid="ref10">10</xref>], used frequently in the field.</p>
        <p>To test the stability of these responses, we repeat the experiment with GPT-4 10 times. Both the scores of the 10 trials and the explanations (randomly chosen from one of the trials) were then used for the evaluation phase. The specific prompting strategy used is displayed in <xref ref-type="boxed-text" rid="box1">Textbox 1</xref> and was performed with automated calls to the OpenAI API with model gpt-4-0125-preview.</p>
        <p>The prompt includes the following instructions.</p>
        <list list-type="bullet">
          <list-item>
            <p>Analyze patient narratives written in Spanish.</p>
          </list-item>
          <list-item>
            <p>Score each narrative on a scale from 0 to 10 for both severity and disability.</p>
          </list-item>
          <list-item>
            <p>Provide explanations for each score in English.</p>
          </list-item>
          <list-item>
            <p>Pay attention to factors that might reduce perceived severity or disability, such as coping mechanisms or support systems.</p>
          </list-item>
        </list>
        <p>We performed experiments between April 22, 2024, and April 24, 2024 with 2 different values for GPT’s temperature parameter. This parameter allows controlling the randomness in the answers of the LLMs. We use 0=less randomness and 1=average randomness, the latter is the default value of GPT-4.</p>
        <boxed-text id="box1" position="float">
          <title>GPT-4 prompts used for the experiment.</title>
          <p>As an expert psychologist specializing in evaluating pain in patients diagnosed with fibromyalgia, you are tasked with analyzing patient narratives about their pain and then scoring them on a scale from 0 (indicating no severity or disability) to 10 (indicating maximum severity and disability). These patients’ explanations about their pain and how they feel it are all written in Spanish. The level of severity is defined as the perceived intensity of pain and overall suffering. Disability is defined as the extent to which fibromyalgia hinders patients’ usual activities and quality of life and is to be rated based on your interpretations of the patients’ texts. Scores should accurately reflect the severity and disability levels described in patient narratives without inflation. A holistic evaluation capturing the complexity of experiences is crucial. Pay attention to phrases indicating coping mechanisms, resilience, or mitigating factors that may reduce perceived severity or disability. Consider contextual understanding, including coping strategies, support systems, and adaptive behaviors, which may mitigate perceived severity and disability. Your role involves receiving a patient’s narrative, enclosed within triple slashes, and analyzing it. You are expected to return your analysis in JSON format, with the following keys: “severity_score” providing the scores for severity ranging from 0 to 10, “disability_score” providing the scores for disability ranging from 0 to 10, “severity_explanation” providing an in English explanation for the severity score and “disability_explanation” providing an in English explanation for the disability score.</p>
        </boxed-text>
      </sec>
      <sec>
        <title>Expert Evaluation</title>
        <p>We asked two pain management and assessment experts to analyze the scores given by GPT-4 and the corresponding textual explanations by using Qualtrics. Although they form part of the list of authors of this paper, they only were aware of the general objective of the study and the instructions provided for their task. After doing their task, they were given access to all the details of the study and the manuscript. They performed the task between April 26, 2024, and May 3, 2024. Specifically, we randomly chose 1 trial out of the 10 performed with GPT-4 temperature 1. Then we asked the 2 experts, first to read the original narratives, the scores, and their explanations given by GPT-4 (<xref ref-type="boxed-text" rid="box2">Textbox 2</xref>) for pain severity and disability. Second to assess on a 7-point scale (from strongly disagree to strongly agree) to what extent the explanation: (1) could have been written by a psychologist expert in fibromyalgia, (2) adequately represents the scores for severity or disability, and (3) they would use the score and explanation provided by GPT-4 for patient assessment. An example of the evaluation task given to the experts can be seen in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>. Each expert was blinded to the assessment made by the other expert.</p>
        <boxed-text id="box2" position="float">
          <title>Example of GPT-4 (temperature 1) explanation for a pain severity score of 4.</title>
          <p>Originally, the patient experienced high levels of pain rated between 8 and 9, indicating a severe impact on their life due to fibromyalgia. However, through their own research and changes in lifestyle and treatment approaches, they have managed to reduce the frequency and intensity of their pain flare-ups to 1 or 2 days and the pain level to between 3 and 4. This significant improvement suggests a reduction in the severity of their condition, despite the initial diagnosis and challenges. Their proactive approach to managing their condition, including dietary changes and avoiding certain environmental factors, has effectively reduced the severity of their symptoms.</p>
        </boxed-text>
      </sec>
      <sec>
        <title>Evaluation</title>
        <p>We performed a 4-stage analysis. First, we used SD analysis to test the stability of assessments given in 10 trials by GPT-4 for pain severity and disability. Second, we compared GPT-4 scores (for pain disability and pain severity) with experts’ scores (from Serrat et al [<xref ref-type="bibr" rid="ref22">22</xref>]) and a naive baseline predictor (which always predicts the average experts’ score) using three strategies: (1) interannotator agreement (IAA) to quantify the agreement between GPT-4 and the experts; (2) RMSE to measure the average squared differences among GPT-4, expert scores, and the naive baseline; and (3) mean absolute error (MAE) to determine if GPT-4 systematically overestimates or underestimates the expert scores (ie, if GPT-4 exhibits bias). IAA was assessed using 4 coefficients to ensure data reliability: percent agreement, weighted percent agreement (a weighted version of percent agreement that takes into account the ordinal nature of the data), Krippendorff α [<xref ref-type="bibr" rid="ref31">31</xref>], and Gwet AC2 coefficient [<xref ref-type="bibr" rid="ref30">30</xref>].</p>
        <p>Third, we compute descriptive statistics (in IBM SPSS) for the experts’ evaluation of the GPT-4 assessments, as well as IAA (in this case, Krippendorff α was not reported because of the significant imbalance in scores, particularly toward the higher end). Fourth, we tested correlations between GPT-4 assessments, expert assessments, and standardized pain measurements [<xref ref-type="bibr" rid="ref22">22</xref>]. Fifth, we used statistical tests, in particular, 2-tailed <italic>t</italic> tests (in SPSS) and <italic>z</italic> scores, (implemented in Python, Python Software Foundation) to test for significant differences between some of our results.</p>
        <p>Regarding stage 2 strategy 1, we chose to use four agreement coefficients to ensure more robust results when reporting the IAA. While percentage agreement gives an indication of a raw agreement among annotators, weighted percentage agreement provides insight into the degree of difference between scores when annotators disagree. On the other hand, Krippendorff α accounts for the possibility that disagreement among annotators may occur by chance. Gwet AC2 further refines this by limiting the pool of items that would lead to an agreement by chance. As a result, Gwet AC2 reports more accurate agreement in the case of imbalance annotation, that is, those annotations where some scores are little or not used. Our experiments show an imbalance in ratings towards higher categories (6-10), making the use of Gwet AC2 coefficient more suitable due to possibility of the interanimation prevalence paradox [<xref ref-type="bibr" rid="ref37">37</xref>]. The metrics used in strategy 1, as well as the metrics used in the other strategies of stage 2, have been calculated using Python. In particular, the <italic>scikit-learn</italic> package was used for error measurement, and the irrCAC library [<xref ref-type="bibr" rid="ref38">38</xref>] to measure the coefficients of agreement. The weighted percent agreement, Krippendorff α, and Gwet AC2 coefficient were measured with ordinal weight. For more details on how the ordinal weights are computed, we refer to the book <italic>Handbook of Inter-Rater Reliability: The Definitive Guide to Measuring the Extent of Agreement Among Raters, 4th Edition</italic> [<xref ref-type="bibr" rid="ref30">30</xref>].</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>GPT-4 Assessment Stability</title>
        <p>The mean and SD for pain severity and disability were assessed for 10 trials at 2 different GPT-4 temperatures (0 and 1). At temperature 0, the mean severity score was 8.13 (SD 1.09) and the mean disability score was 7.25 (SD 1.28). At temperature 1, the mean severity score was 8.08 (SD 1.02) and the mean disability score was 7.33 (SD 1.32). These results suggest stability in both pain severity and disability across the different 10 trials and temperatures.</p>
        <p>If we compare them with the average score of 2 human experts from Serrat et al [<xref ref-type="bibr" rid="ref22">22</xref>], we observe that GPT has a slight tendency to overestimate the pain level and a lower SD. The mean for severity was 7.42 (SD 1.38) for expert 1 and 7.30 (SD 1.72) for expert 2, while the mean for disability was 7.00 (SD 2.12) for expert 1 and 6.95 (SD 2.05) for expert 2. This difference also becomes visible when comparing the distribution of the individual scores as depicted in <xref rid="figure1" ref-type="fig">Figure 1</xref> for severity and <xref rid="figure2" ref-type="fig">Figure 2</xref> for disability. GPT evaluations show a more expressed mode and are less frequent in scores lower or equal to 5. GPT also is reluctant to assign the highest score of 10.</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>Distribution of the severity scores. Human experts versus GPT with 2 different temperature settings.</p>
          </caption>
          <graphic xlink:href="jmir_v27i1e65903_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>Distribution of the disability scores. Human experts versus GPT with 2 different temperature settings.</p>
          </caption>
          <graphic xlink:href="jmir_v27i1e65903_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Agreement in Evaluations of Pain Severity and Disability Made by GPT and Experts</title>
        <p>Results related to the IAA are presented in <xref ref-type="table" rid="table1">Table 1</xref>. IAA between experts is acceptable (both for pain severity and disability) considering Krippendorff α and Gwet AC2 coefficients. Delving deeper into the IAA analysis, the low percentage agreement (0.29 for pain severity and 0.31 for disability), combined with a high weighted percentage agreement (0.96 for pain severity and 0.95 for disability), suggests that while experts rarely chose exactly the same score, their disagreements were typically within adjacent scores. Given the subjectivity inherent in assessing WNs, which is influenced by numerous factors, the IAA demonstrates a satisfactory agreement among experts.</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Mean (SD) of the agreement between experts and GPT-4. Values for GPT-4 are averages (SDs) over 10 experiments.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="210"/>
            <col width="0"/>
            <col width="80"/>
            <col width="0"/>
            <col width="170"/>
            <col width="0"/>
            <col width="170"/>
            <col width="170"/>
            <col width="170"/>
            <thead>
              <tr valign="top">
                <td colspan="3">
                  <break/>
                </td>
                <td colspan="5">Expert 1 versus</td>
                <td colspan="2">Expert 2 versus</td>
              </tr>
              <tr valign="top">
                <td colspan="3">
                  <break/>
                </td>
                <td colspan="2">Expert 2</td>
                <td colspan="2">GPT-4 (temperature 0) (mean, SD)</td>
                <td>GPT-4 (temperature 1) (mean, SD)</td>
                <td>GPT-4 (temperature 0) (mean, SD)</td>
                <td>GPT-4 (temperature 1) (mean, SD)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="10">
                  <bold>Pain severity</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Percent agreement</td>
                <td colspan="2">0.29</td>
                <td colspan="2">0.36 (0.01)</td>
                <td colspan="2">0.36 (0.04)</td>
                <td>0.27 (0.01)</td>
                <td>0.32 (0.03)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Weighted percent agreement</td>
                <td colspan="2">0.96</td>
                <td colspan="2">0.94 (&#60;0.01)</td>
                <td colspan="2">0.94 (0.01)</td>
                <td>0.95 (&#60;0.01)</td>
                <td>0.95 (&#60;0.01)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Gwet AC2</td>
                <td colspan="2">0.87</td>
                <td colspan="2">0.83 (&#60;0.01)</td>
                <td colspan="2">0.84 (0.02)</td>
                <td>0.84 (&#60;0.01)</td>
                <td>0.84 (0.01)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Krippendorff α</td>
                <td colspan="2">0.66</td>
                <td colspan="2">0.46 (0.01)</td>
                <td colspan="2">0.45 (0.05)</td>
                <td>0.51 (0.01)</td>
                <td>0.49 (0.04)</td>
              </tr>
              <tr valign="top">
                <td colspan="10">
                  <bold>Disability</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Percent agreement</td>
                <td colspan="2">0.31</td>
                <td colspan="2">0.21 (0.02)</td>
                <td colspan="2">0.23 (0.04)</td>
                <td>0.33 (0.02)</td>
                <td>0.35 (0.08)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Weighted percent agreement</td>
                <td colspan="2">0.95</td>
                <td colspan="2">0.94 (&#60;0.01)</td>
                <td colspan="2">0.94 (0.0)</td>
                <td>0.94 (&#60;0.01)</td>
                <td>0.94 (&#60;0.01)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Gwet AC2<sup>a</sup></td>
                <td colspan="2">0.83</td>
                <td colspan="2">0.79 (&#60;0.01)</td>
                <td colspan="2">0.79 (0.01)</td>
                <td>0.8 (&#60;0.01)</td>
                <td>0.8 (0.02)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Krippendorff α</td>
                <td colspan="2">0.69</td>
                <td colspan="2">0.47 (0.01)</td>
                <td colspan="2">0.49 (0.03)</td>
                <td>0.57 (0.01)</td>
                <td>0.57 (0.04)</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table1fn1">
              <p><sup>a</sup>Gwet AC2: the weighted version of Gwet agreement coefficient.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <p>When comparing GPT-4 scores with expert scores, Krippendorff α indicated slightly lower agreement compared to the agreement between the experts. However, percent agreement, weighted percent agreement, and Gwet AC2 are comparable to the agreement between experts, with the notable exception that, for pain severity (temperature 1), the percent agreement was higher than the one reached by the experts.</p>
        <p>We compare the scores of pain severity and disability obtained with GPT-4 to those of the experts and the naive baseline using RMSE and MAE (by definition, MAE is 0 for the naive baseline).</p>
        <p>GPT-4 can approximate the average of the expert ratings with RMSEs of around 1.20 for severity and 1.44 for disability (<xref ref-type="table" rid="table2">Table 2</xref>). These values are significantly lower than the ones obtained with the naive baselines with <italic>P</italic>&#60;.001 for GPT-4 with temperature 0 and <italic>P</italic>&#60;.01 for temperature 1. Furthermore, we can observe that the RSMEs between the 2 experts are very close to the ones obtained by GPT-4 when compared to the average of the 2 experts for disability and even smaller for pain severity. Overall, these errors are acceptable, especially when compared to the differences between the 2 experts.</p>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>Predicting severity and disability scores with GPT with different temperature values. RMSE<sup>a</sup> and MAE<sup>b</sup> of GPT-4 compared to 2 expert evaluations and a naive baseline<sup>c</sup>.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="80"/>
            <col width="60"/>
            <col width="90"/>
            <col width="100"/>
            <col width="80"/>
            <col width="0"/>
            <col width="100"/>
            <col width="100"/>
            <col width="80"/>
            <col width="0"/>
            <col width="100"/>
            <col width="100"/>
            <col width="80"/>
            <thead>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Statistical measures</td>
                <td colspan="5">Expert 1 versus</td>
                <td colspan="4">Expert 2 versus</td>
                <td colspan="3">Average of experts versus</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>Expert 2</td>
                <td>GPT-4 (temperature 0)</td>
                <td>GPT-4 (temperature 1)</td>
                <td>Baseline</td>
                <td colspan="2">GPT-4 (temperature 0)</td>
                <td>GPT-4 (temperature 1)</td>
                <td>Baseline</td>
                <td colspan="2">GPT-4 (temperature 0)</td>
                <td>GPT-4 (temperature 1</td>
                <td>Baseline</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="14">
                  <bold>Pain severity</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>RSME</td>
                <td>1.15</td>
                <td>1.27<sup>d</sup></td>
                <td>1.25<sup>e</sup></td>
                <td>1.38</td>
                <td colspan="2">1.39<sup>d</sup></td>
                <td>1.40<sup>f</sup></td>
                <td>1.72</td>
                <td colspan="2">1.20<sup>d</sup></td>
                <td>1.19<sup>f</sup></td>
                <td>1.45</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>MAE</td>
                <td>0.12</td>
                <td>–0.71</td>
                <td>–0.66</td>
                <td>0.00</td>
                <td colspan="2">–0.83</td>
                <td>–0.78</td>
                <td>0.00</td>
                <td colspan="2">–0.77</td>
                <td>–0.72</td>
                <td>0.00</td>
              </tr>
              <tr valign="top">
                <td colspan="14">
                  <bold>Disability</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>RSME</td>
                <td>1.56</td>
                <td>1.75<sup>d</sup></td>
                <td>1.74<sup>f</sup></td>
                <td>2.20</td>
                <td colspan="2">1.50<sup>d</sup></td>
                <td>1.53<sup>f</sup></td>
                <td>2.05</td>
                <td colspan="2">1.44<sup>d</sup></td>
                <td>1.44<sup>f</sup></td>
                <td>1.98</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>MAE</td>
                <td>0.05</td>
                <td>–0.25</td>
                <td>–0.33</td>
                <td>0.00</td>
                <td colspan="2">–0.30</td>
                <td>–0.37</td>
                <td>0.00</td>
                <td colspan="2">–0.27</td>
                <td>–0.35</td>
                <td>0.00</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table2fn1">
              <p><sup>a</sup>RMSE: root-mean-square error.</p>
            </fn>
            <fn id="table2fn2">
              <p><sup>b</sup>MAE: mean average error.</p>
            </fn>
            <fn id="table2fn3">
              <p><sup>c</sup>A significant difference between the root mean square errors of GPT-4 and the baseline (2-sided <italic>P</italic> value equivalent of the <italic>z</italic> score of 10 experiments with GPT-4 and the baseline).</p>
            </fn>
            <fn id="table2fn4">
              <p><sup>d</sup><italic>P</italic>&#60;.001.</p>
            </fn>
            <fn id="table2fn5">
              <p><sup>e</sup><italic>P</italic>&#60;.10.</p>
            </fn>
            <fn id="table2fn6">
              <p><sup>f</sup><italic>P</italic>&#60;.01.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <p>We also analyzed the MAEs (ie, expert score minus GPT-4 score), which indicate a potential tendency to underrate or overrate the expert scores. For both temperatures, we find that GPT-4 on average slightly overestimates the individual experts’ scores. More precisely, between 0.66 (temperature 1) and 0.83 (temperature 0) for severity and 0.25 (temperature 0) and 0.37 (temperature 1) for disability.</p>
        <p>Finally, all the results commented on are quite similar when comparing temperatures 0 and 1. The comparison between GPT-4 scores and those provided by experts highlights a significant alignment in their assessments of WNs. This finding holds promise, especially given the inherent subjectivity involved in evaluating WNs. To delve deeper into this alignment, we enlisted 2 pain assessment experts to evaluate the GPT-4 scores and their accompanying descriptions.</p>
      </sec>
      <sec>
        <title>Expert Evaluation</title>
        <p>The IAA among experts is acceptable, as shown in <xref ref-type="table" rid="table3">Table 3</xref>. The low percent agreement is compensated by a notably high weighted percent agreement (except for pain severity in question 1). This suggests that, although experts rarely assign the same score, they tend to choose adjacent ratings when a disagreement arises. This phenomenon indicating acceptable IAA is also reflected in Gwet AC2 values (except for disability in question 2).</p>
        <table-wrap position="float" id="table3">
          <label>Table 3</label>
          <caption>
            <p>Agreement between experts on the 3 questions. Ordinal weights were then applied to these categories based on their positions on the scale [<xref ref-type="bibr" rid="ref30">30</xref>]<sup>a,b</sup>.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="520"/>
            <col width="150"/>
            <col width="150"/>
            <col width="150"/>
            <thead>
              <tr valign="top">
                <td colspan="2">
                  <break/>
                </td>
                <td>Question 1</td>
                <td>Question 2</td>
                <td>Question 3</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="5">
                  <bold>Pain severity</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Percent agreement</td>
                <td>0.08</td>
                <td>0.21</td>
                <td>0.12</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Weighted percent agreement</td>
                <td>0.50</td>
                <td>0.95</td>
                <td>0.91</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Gwet AC2</td>
                <td>0.93</td>
                <td>0.83</td>
                <td>0.66</td>
              </tr>
              <tr valign="top">
                <td colspan="5">
                  <bold>Disability</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Percent agreement</td>
                <td>0.17</td>
                <td>0.21</td>
                <td>0.33</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Weighted percent agreement</td>
                <td>0.95</td>
                <td>0.96</td>
                <td>0.94</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Gwet AC2</td>
                <td>0.81</td>
                <td>0.43</td>
                <td>0.72</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table3fn1">
              <p><sup>a</sup>The coefficients were calculated by assigning numerical values to the categories: 1=strongly disagree, 2=disagree, 3=somewhat disagree, 4=neither agree nor disagree, 5=somewhat agree, 6=agree, and 7=strongly agree.</p>
            </fn>
            <fn id="table3fn2">
              <p><sup>b</sup>This method acknowledges that experts may differ more significantly in their disagreement if one selects “somewhat disagree” while the other selects “agree” compared to if one chooses “agree” while the other chooses “strongly agree.”</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <p>As shown in <xref ref-type="table" rid="table4">Table 4</xref>, a 2-tailed <italic>t</italic> test analysis indicates that expert 2’s assessments were significantly higher (<italic>P</italic>&#60;.001) for all 3 questions compared to those of expert 1’s. This phenomenon implies a divergence in experts’ scoring interpretations. Despite this difference, both experts appear to adhere consistently to their respective scoring criteria during the annotation process. This finding suggests that while there may be slightly individual variations in scoring approaches between experts, they maintain internal consistency in their assessments.</p>
        <table-wrap position="float" id="table4">
          <label>Table 4</label>
          <caption>
            <p>Expert evaluation scores: mean (SD), max score is 7. Significant differences between the 2 experts are determined by a 2-sided <italic>t</italic> test<sup>a</sup>.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="520"/>
            <col width="150"/>
            <col width="150"/>
            <col width="150"/>
            <thead>
              <tr valign="top">
                <td colspan="2">
                  <break/>
                </td>
                <td>Expert 1, mean (SD)</td>
                <td>Expert 2, mean (SD)</td>
                <td><italic>t</italic> test (<italic>df</italic>)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="5">
                  <bold>Question 1<sup>b</sup></bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Pain severity</td>
                <td>5.72 (0.45)</td>
                <td>6.88 (0.32)</td>
                <td>–13.67 (84)<sup>c</sup></td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Disability</td>
                <td>5.93 (0.26)</td>
                <td>6.83 (0.37)</td>
                <td>–13.10 (84)<sup>c</sup></td>
              </tr>
              <tr valign="top">
                <td colspan="5">
                  <bold>Question 2<sup>d</sup></bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Pain severity</td>
                <td>5.93 (0.26)</td>
                <td>6.79 (0.41)</td>
                <td>–11.62 (84)<sup>c</sup></td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Disability</td>
                <td>6 (0)</td>
                <td>6.79 (0.41)</td>
                <td>–12.60 (84)<sup>c</sup></td>
              </tr>
              <tr valign="top">
                <td colspan="5">
                  <bold>Question 3<sup>e</sup></bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Pain severity</td>
                <td>5.44 (0.63)</td>
                <td>6.72 (0.45)</td>
                <td>–10.82 (84)<sup>c</sup></td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Disability</td>
                <td>5.77 (0.43)</td>
                <td>6.65 (0.48)</td>
                <td>–8.99 (84)<sup>c</sup></td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table4fn1">
              <p><sup>a</sup>The coefficients were calculated by assigning numerical values to the categories: 1=strongly disagree, 2=disagree, 3=somewhat disagree, 4=neither agree nor disagree, 5=somewhat agree, 6=agree, and 7=strongly agree.</p>
            </fn>
            <fn id="table4fn2">
              <p><sup>b</sup>Question 1: The explanation could have been written by a psychologist expert in fibromyalgia.</p>
            </fn>
            <fn id="table4fn3">
              <p><sup>c</sup><italic>P</italic>&#60;.001.</p>
            </fn>
            <fn id="table4fn4">
              <p><sup>d</sup>Question 2: The explanation adequately represents the pain severity expressed in the narrative.</p>
            </fn>
            <fn id="table4fn5">
              <p><sup>e</sup>Question 3: I would use the pain severity score and explanation above to help myself assess the patient's pain.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <p>The mean assessment scores for both pain severity and disability are presented in <xref ref-type="table" rid="table4">Table 4</xref>. Assessments for the 3 questions were high (scores ranging from 5=somewhat agree and 6=agree for expert 1 and 6=agree and 7=strongly agree for expert 2) and with low variability (SD, ranging from 0 to 0.63). In other words, the experts agreed to consider GPT-4 assessments accurate and usable as clinician assistants.</p>
        <p>In conclusion, the disagreement observed between the experts appears to stem from differing interpretations of scoring criteria, as evidenced by the consistent trend of 1 expert assigning higher scores than the other expert during disagreements, displaying a more optimistic outlook. This indicates a personal bias in their evaluation approaches. The identification of such a clear pattern in the evaluation process is particularly intriguing as it suggests a consistent trend in how assessments were conducted and interpreted by the experts. On the one hand, further analysis of this pattern could provide valuable insights into the underlying factors influencing the evaluation outcomes and shed light on the reliability and consistency of the assessment process. Understanding and leveraging such patterns can enhance the effectiveness and accuracy of automated systems like LLMs in pain narrative assessments. On the other hand, understanding this pattern in expert evaluations can provide valuable insights into the subjective nature of pain assessments and the potential impact on IAA in pain narrative evaluations.</p>
        <p>Despite this pattern observed in the evaluation, both experts concur on the use of GPT-4 as a valuable tool for pain assessment tasks. This alignment in their assessment of GPT-4's effectiveness underscores its potential to complement and enhance traditional expert evaluations in pain narrative analysis.</p>
      </sec>
      <sec>
        <title>Correlations of Human and GPT-4 Assessments With Standardized Measurements</title>
        <p>The mean of the ratings assigned by experts for severity and disability [<xref ref-type="bibr" rid="ref22">22</xref>] significantly correlated with scores from the FIQR questionnaire, and the anxiety and depression scores from the HADS questionnaire. When analyzing the corresponding correlations with the scores given by GPT-4, for temperature 0, results were very similar to the ones found for experts with the exception that in this case there were no significant correlations between pain severity and disability and anxiety scores. However, the correlations between scores assigned by GPT-4 with temperature 1 and depression and anxiety, were in the same line as the ones found for the experts (<xref ref-type="table" rid="table4">Table 4</xref>). Also, in this case, the results indicate that GPT-4’s performance in assessing pain WNs closely approximates humans’ assessment. In addition, a further encouraging outcome is found: GPT-4’s pain WNs assessment shows a favorable alignment with standard pain assessment tests (for example, see GPT-4 with temperature 1 in the assessments of FIQR and HADS depression in <xref ref-type="table" rid="table5">Table 5</xref>).</p>
        <table-wrap position="float" id="table5">
          <label>Table 5</label>
          <caption>
            <p>Pearson correlations of expert and GPT-4 assessments with standardized measurements<sup>a</sup>.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="370"/>
            <col width="150"/>
            <col width="150"/>
            <col width="150"/>
            <col width="150"/>
            <thead>
              <tr valign="top">
                <td colspan="2">
                  <break/>
                </td>
                <td>FIQR<sup>b</sup></td>
                <td>TSK<sup>c</sup></td>
                <td colspan="2">HADS<sup>d</sup></td>
              </tr>
              <tr valign="top">
                <td colspan="2">
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>Anxiety</td>
                <td>Depression</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="6">
                  <bold>Experts</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Pain severity</td>
                <td>0.41<sup>e</sup></td>
                <td>0.18</td>
                <td>0.34<sup>f</sup></td>
                <td>0.44<sup>e</sup></td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Disability</td>
                <td>0.44<sup>e</sup></td>
                <td>0.19</td>
                <td>0.38<sup>f</sup></td>
                <td>0.46<sup>e</sup></td>
              </tr>
              <tr valign="top">
                <td colspan="6">
                  <bold>GPT-4 (temperature 0) Mean of 10 Trials</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Pain severity</td>
                <td>0.36<sup>f</sup></td>
                <td>0.17</td>
                <td>0.25</td>
                <td>0.35<sup>f</sup></td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Disability</td>
                <td>0.42<sup>e</sup></td>
                <td>0.21</td>
                <td>0.28</td>
                <td>0.36<sup>f</sup></td>
              </tr>
              <tr valign="top">
                <td colspan="6">
                  <bold>GPT-4 (temperature 1) Mean of 10 Trials </bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Pain severity</td>
                <td>0.43<sup>e</sup></td>
                <td>0.18</td>
                <td>0.32<sup>f</sup></td>
                <td>0.41<sup>e</sup></td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Disability</td>
                <td>0.49<sup>e</sup></td>
                <td>0.16</td>
                <td>0.34<sup>f</sup></td>
                <td>0.45<sup>e</sup></td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table5fn1">
              <p><sup>a</sup>Significant correlations are indicated with <sup>e</sup><italic>P</italic>&#60;.01 and <sup>f</sup><italic>P</italic>&#60;.05.</p>
            </fn>
            <fn id="table5fn2">
              <p><sup>b</sup>FIQ-R: Fibromyalgia Impact Questionnaire.</p>
            </fn>
            <fn id="table5fn3">
              <p><sup>c</sup>TSK: Tampa Scale of Kinesiophobia.</p>
            </fn>
            <fn id="table5fn4">
              <p><sup>d</sup>HADS: Hospital Anxiety and Depression Inventory Scale.</p>
            </fn>
            <fn id="table5fn5">
              <p><sup>e</sup><italic>P</italic>&#60;.01.</p>
            </fn>
            <fn id="table5fn6">
              <p><sup>f</sup><italic>P</italic>&#60;.05.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <p>Our preliminary study highlights the potential of using LLMs, such as GPT-4, for automatizing the assessment of pain severity and disability levels in patient WNs. WNs can be very useful for people’s pain assessment but are time-consuming for clinicians. The methodology based on LLMs presented in this paper conducts an automated assessment of the levels of pain severity and disability in the patient’s WNs. Our results indicate that experts in pain assessment can make use of LLMs for faster patient assessment. Indeed, the conducted analysis, bolstered by various statistical measures, reveals a significant resemblance between expert scores and those generated by GPT-4. This observation is further supported by the comparable correlation values observed between standardized measurements and assessments by both experts and GPT-4. Moreover, the positive reception from experts regarding the scores and explanations generated by GPT-4 underscores the potential applicability of automated systems in pain assessment. It is worth noting that both experts agree on the perceptions about GPT-4's scores and explanations: although one of them seems more positive in her assessments, leading to some variations in agreement indices.</p>
      </sec>
      <sec>
        <title>Limitations and Future Research</title>
        <p>While these findings are promising, some limitations are present that motivate further research to advance this area.</p>
        <p>First, in this study, we used pain severity and disability as main indicators of the texts, since we wanted to explore variables relevant in clinical context. International guidelines support measuring these in the pain field [<xref ref-type="bibr" rid="ref9">9</xref>,<xref ref-type="bibr" rid="ref10">10</xref>]. Moving forward, it would be beneficial to explore additional variables beyond pain severity and disability that could enhance the clinical relevance of automated assessments. For example, future research could instruct LLMs to assess levels of catastrophizing thoughts in the texts. These are common patterns of thinking in people with pain, related to a worse adaptation to pain in multiple studies, for example, Quartana et al [<xref ref-type="bibr" rid="ref39">39</xref>]. Assessing this variable adequately would be very useful in the clinical context to identify people at risk of suffering more complex problems that need more attention.</p>
        <p>Second, our findings suggest the need for ongoing efforts to enhance the precision of GPT-4 scores in pain WN assessments (with special attention to the extreme scores). While the agreement between GPT-4-generated scores and expert ratings was generally favorable, there is room for improvement. We observe that GPT-4 has a slight tendency to overestimate the pain severity and disability of WNs, using less frequent scores lower or equal to 5 than humans. Moreover, GPT-4 avoids using the highest score of 10. More research is needed to investigate if this pattern is observed with a greater sample of WNs or if it is linked to the sample used in this paper. If these persist in larger studies, a fine-tuning strategy can be considered to alleviate this problem. For example, future research should analyze the use of few-shot learning methods to test the performance of the GPT-4 or other LLMs in assessing pain WNs. By refining the training data and algorithms used by LLMs, we can strive to achieve even greater accuracy and reliability in automated pain WN assessments.</p>
        <p>Third, this study, due to its preliminary nature, focused on people with fibromyalgia. Future research would benefit from including people with different chronic pain problems, and bigger samples to compare the performance of GPT-4 in different pain conditions. Big differences are not expected, since we assume that the ability of GPT-4 to analyze WNs would not depend upon the specific pain problem. However, more empirical tests are needed to support this assumption and extend our methodology to other pain and health problems.</p>
        <p>Fourth, the use of GPT has some inherent limitations. For example, it may generate text that suffers from biases, hallucinations, and inconsistency [<xref ref-type="bibr" rid="ref40">40</xref>,<xref ref-type="bibr" rid="ref41">41</xref>]. For this reason, any tools that use GPT, such as the one we propose in this paper, should not be used as stand-alone tools; they always need human supervision. In our case, this should be supervised by clinicians. Additionally, future work could use the Quality Analysis of Medical Artificial Intelligence tool [<xref ref-type="bibr" rid="ref42">42</xref>] which provides a standardized way to evaluate medical AI output as we have been analyzing. Future research should adopt this methodology when assessing the explanations given by LLMs about the assessment of the WNs. Finally, as previously explained, we asked for an explanation of the scores given by GPT-4 but this was not the case for the scores given by the experts in Serrat et al [<xref ref-type="bibr" rid="ref22">22</xref>]. In future research, it would be interesting to compare the explanations of scores provided by an LLM and the corresponding explanations provided by human experts. For example, a blind annotation can aim to evaluate if experts would be able to distinguish the origin of these explanations.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>Example of the evaluation task given to the experts.</p>
        <media xlink:href="jmir_v27i1e65903_app1.pdf" xlink:title="PDF File  (Adobe PDF File), 101 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">AI</term>
          <def>
            <p>artificial intelligence</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">FIQR</term>
          <def>
            <p>Revised Fibromyalgia Impact Questionnaire</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">Gwet AC2</term>
          <def>
            <p>the weighted version of Gwet agreement coefficient</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">HADS</term>
          <def>
            <p>Hospital Anxiety and Depression Scale</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">IAA</term>
          <def>
            <p>interannotator agreement</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">LLM</term>
          <def>
            <p>large language model</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">MAE</term>
          <def>
            <p>mean absolute error</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">RMSE</term>
          <def>
            <p>root-mean-squared error</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb9">WN</term>
          <def>
            <p>written narrative</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>Universitat Oberta de Catalunya supported this study by covering the fees associated with the publication of this manuscript.</p>
    </ack>
    <notes>
      <title>Data Availability</title>
      <p>Nontextual data (expert and GPT scores) are included in this published article and <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>. The other materials generated or analyzed during this study are available from the corresponding author on reasonable request.</p>
    </notes>
    <fn-group>
      <fn fn-type="con">
        <p>AK, JA, JF, and RN conceived the idea and designed the study. KA and MS acted as experts for the evaluation tasks. JF built the code needed for the GPT tasks and performed the analyses. JA and RN created a first draft of the manuscript, which was reviewed and improved by all the authors.</p>
      </fn>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Johannes</surname>
              <given-names>CB</given-names>
            </name>
            <name name-style="western">
              <surname>Le</surname>
              <given-names>TK</given-names>
            </name>
            <name name-style="western">
              <surname>Zhou</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Johnston</surname>
              <given-names>JA</given-names>
            </name>
            <name name-style="western">
              <surname>Dworkin</surname>
              <given-names>RH</given-names>
            </name>
          </person-group>
          <article-title>The prevalence of chronic pain in United States adults: results of an internet-based survey</article-title>
          <source>J Pain</source>
          <year>2010</year>
          <volume>11</volume>
          <issue>11</issue>
          <fpage>1230</fpage>
          <lpage>1239</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1526-5900(10)00601-2"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jpain.2010.07.002</pub-id>
          <pub-id pub-id-type="medline">20797916</pub-id>
          <pub-id pub-id-type="pii">S1526-5900(10)00601-2</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Leadley</surname>
              <given-names>RM</given-names>
            </name>
            <name name-style="western">
              <surname>Armstrong</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>YC</given-names>
            </name>
            <name name-style="western">
              <surname>Allen</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Kleijnen</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Chronic diseases in the European Union: the prevalence and health cost implications of chronic pain</article-title>
          <source>J Pain Palliat Care Pharmacother</source>
          <year>2012</year>
          <volume>26</volume>
          <issue>4</issue>
          <fpage>310</fpage>
          <lpage>325</lpage>
          <pub-id pub-id-type="doi">10.3109/15360288.2012.736933</pub-id>
          <pub-id pub-id-type="medline">23216170</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Breivik</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Collett</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Ventafridda</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Cohen</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Gallacher</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Survey of chronic pain in Europe: prevalence, impact on daily life, and treatment</article-title>
          <source>Eur J Pain</source>
          <year>2006</year>
          <volume>10</volume>
          <issue>4</issue>
          <fpage>287</fpage>
          <lpage>333</lpage>
          <pub-id pub-id-type="doi">10.1016/j.ejpain.2005.06.009</pub-id>
          <pub-id pub-id-type="medline">16095934</pub-id>
          <pub-id pub-id-type="pii">S1090-3801(05)00086-8</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rikard</surname>
              <given-names>SM</given-names>
            </name>
            <name name-style="western">
              <surname>Strahan</surname>
              <given-names>AE</given-names>
            </name>
            <name name-style="western">
              <surname>Schmit</surname>
              <given-names>KM</given-names>
            </name>
            <name name-style="western">
              <surname>Guy</surname>
              <given-names>GP</given-names>
            </name>
          </person-group>
          <article-title>Chronic pain among adults—United States, 2019–2021</article-title>
          <source>MMWR Morb Mortal Wkly Rep</source>
          <year>2023</year>
          <volume>72</volume>
          <issue>15</issue>
          <fpage>379</fpage>
          <lpage>385</lpage>
          <pub-id pub-id-type="doi">10.15585/mmwr.mm7215a1</pub-id>
          <pub-id pub-id-type="medline">37053114</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hooten</surname>
              <given-names>WM</given-names>
            </name>
          </person-group>
          <article-title>Chronic pain and mental health disorders: shared neural mechanisms, epidemiology, and treatment</article-title>
          <source>Mayo Clin Proc</source>
          <year>2016</year>
          <volume>91</volume>
          <issue>7</issue>
          <fpage>955</fpage>
          <lpage>970</lpage>
          <pub-id pub-id-type="doi">10.1016/j.mayocp.2016.04.029</pub-id>
          <pub-id pub-id-type="medline">27344405</pub-id>
          <pub-id pub-id-type="pii">S0025-6196(16)30182-3</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Blyth</surname>
              <given-names>FM</given-names>
            </name>
            <name name-style="western">
              <surname>March</surname>
              <given-names>LM</given-names>
            </name>
            <name name-style="western">
              <surname>Brnabic</surname>
              <given-names>AJM</given-names>
            </name>
            <name name-style="western">
              <surname>Cousins</surname>
              <given-names>MJ</given-names>
            </name>
          </person-group>
          <article-title>Chronic pain and frequent use of health care</article-title>
          <source>Pain</source>
          <year>2004</year>
          <volume>111</volume>
          <issue>1-2</issue>
          <fpage>51</fpage>
          <lpage>58</lpage>
          <pub-id pub-id-type="doi">10.1016/j.pain.2004.05.020</pub-id>
          <pub-id pub-id-type="medline">15327808</pub-id>
          <pub-id pub-id-type="pii">S030439590400274X</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cáceres-Matos</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Gil-García</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Vázquez-Santiago</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Cabrera-León</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>The use of healthcare services and disabling chronic pain: results from the cross-sectional population-based Andalusian Health Survey</article-title>
          <source>Eur J Public Health</source>
          <year>2024</year>
          <volume>34</volume>
          <issue>4</issue>
          <fpage>639</fpage>
          <lpage>645</lpage>
          <pub-id pub-id-type="doi">10.1093/eurpub/ckae079</pub-id>
          <pub-id pub-id-type="medline">38750626</pub-id>
          <pub-id pub-id-type="pii">7674906</pub-id>
          <pub-id pub-id-type="pmcid">PMC11293836</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Torralba</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Miquel</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Darba</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>[Current status of chronic pain in Spain: "Pain Proposal" initiative]</article-title>
          <source>Rev Soc Esp Dolor</source>
          <year>2014</year>
          <volume>21</volume>
          <issue>1</issue>
          <fpage>16</fpage>
          <lpage>22</lpage>
          <pub-id pub-id-type="doi">10.4321/s1134-80462014000100003</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Turk</surname>
              <given-names>DC</given-names>
            </name>
            <name name-style="western">
              <surname>Dworkin</surname>
              <given-names>RH</given-names>
            </name>
            <name name-style="western">
              <surname>Allen</surname>
              <given-names>RR</given-names>
            </name>
            <name name-style="western">
              <surname>Bellamy</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Brandenburg</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Carr</surname>
              <given-names>DB</given-names>
            </name>
            <name name-style="western">
              <surname>Cleeland</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Dionne</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Farrar</surname>
              <given-names>JT</given-names>
            </name>
            <name name-style="western">
              <surname>Galer</surname>
              <given-names>BS</given-names>
            </name>
            <name name-style="western">
              <surname>Hewitt</surname>
              <given-names>DJ</given-names>
            </name>
            <name name-style="western">
              <surname>Jadad</surname>
              <given-names>AR</given-names>
            </name>
            <name name-style="western">
              <surname>Katz</surname>
              <given-names>NP</given-names>
            </name>
            <name name-style="western">
              <surname>Kramer</surname>
              <given-names>LD</given-names>
            </name>
            <name name-style="western">
              <surname>Manning</surname>
              <given-names>DC</given-names>
            </name>
            <name name-style="western">
              <surname>McCormick</surname>
              <given-names>CG</given-names>
            </name>
            <name name-style="western">
              <surname>McDermott</surname>
              <given-names>MP</given-names>
            </name>
            <name name-style="western">
              <surname>McGrath</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Quessy</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Rappaport</surname>
              <given-names>BA</given-names>
            </name>
            <name name-style="western">
              <surname>Robinson</surname>
              <given-names>JP</given-names>
            </name>
            <name name-style="western">
              <surname>Royal</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Simon</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Stauffer</surname>
              <given-names>JW</given-names>
            </name>
            <name name-style="western">
              <surname>Stein</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Tollett</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Witter</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Core outcome domains for chronic pain clinical trials: IMMPACT recommendations</article-title>
          <source>Pain</source>
          <year>2003</year>
          <volume>106</volume>
          <issue>3</issue>
          <fpage>337</fpage>
          <lpage>345</lpage>
          <pub-id pub-id-type="doi">10.1016/j.pain.2003.08.001</pub-id>
          <pub-id pub-id-type="medline">14659516</pub-id>
          <pub-id pub-id-type="pii">00006396-200312000-00014</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Dworkin</surname>
              <given-names>RH</given-names>
            </name>
            <name name-style="western">
              <surname>Turk</surname>
              <given-names>DC</given-names>
            </name>
            <name name-style="western">
              <surname>Farrar</surname>
              <given-names>JT</given-names>
            </name>
            <name name-style="western">
              <surname>Haythornthwaite</surname>
              <given-names>JA</given-names>
            </name>
            <name name-style="western">
              <surname>Jensen</surname>
              <given-names>MP</given-names>
            </name>
            <name name-style="western">
              <surname>Katz</surname>
              <given-names>NP</given-names>
            </name>
            <name name-style="western">
              <surname>Kerns</surname>
              <given-names>RD</given-names>
            </name>
            <name name-style="western">
              <surname>Stucki</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Allen</surname>
              <given-names>RR</given-names>
            </name>
            <name name-style="western">
              <surname>Bellamy</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Carr</surname>
              <given-names>DB</given-names>
            </name>
            <name name-style="western">
              <surname>Chandler</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Cowan</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Dionne</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Galer</surname>
              <given-names>BS</given-names>
            </name>
            <name name-style="western">
              <surname>Hertz</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Jadad</surname>
              <given-names>AR</given-names>
            </name>
            <name name-style="western">
              <surname>Kramer</surname>
              <given-names>LD</given-names>
            </name>
            <name name-style="western">
              <surname>Manning</surname>
              <given-names>DC</given-names>
            </name>
            <name name-style="western">
              <surname>Martin</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>McCormick</surname>
              <given-names>CG</given-names>
            </name>
            <name name-style="western">
              <surname>McDermott</surname>
              <given-names>MP</given-names>
            </name>
            <name name-style="western">
              <surname>McGrath</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Quessy</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Rappaport</surname>
              <given-names>BA</given-names>
            </name>
            <name name-style="western">
              <surname>Robbins</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Robinson</surname>
              <given-names>JP</given-names>
            </name>
            <name name-style="western">
              <surname>Rothman</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Royal</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Simon</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Stauffer</surname>
              <given-names>JW</given-names>
            </name>
            <name name-style="western">
              <surname>Stein</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Tollett</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Wernicke</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Witter</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Core outcome measures for chronic pain clinical trials: IMMPACT recommendations</article-title>
          <source>Pain</source>
          <year>2005</year>
          <volume>113</volume>
          <issue>1-2</issue>
          <fpage>9</fpage>
          <lpage>19</lpage>
          <pub-id pub-id-type="doi">10.1016/j.pain.2004.09.012</pub-id>
          <pub-id pub-id-type="medline">15621359</pub-id>
          <pub-id pub-id-type="pii">S0304-3959(04)00440-3</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Morse</surname>
              <given-names>JM</given-names>
            </name>
          </person-group>
          <article-title>Using qualitative methods to access the pain experience</article-title>
          <source>Br J Pain</source>
          <year>2015</year>
          <volume>9</volume>
          <issue>1</issue>
          <fpage>26</fpage>
          <lpage>31</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/26516553"/>
          </comment>
          <pub-id pub-id-type="doi">10.1177/2049463714550507</pub-id>
          <pub-id pub-id-type="medline">26516553</pub-id>
          <pub-id pub-id-type="pii">10.1177_2049463714550507</pub-id>
          <pub-id pub-id-type="pmcid">PMC4616988</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wideman</surname>
              <given-names>TH</given-names>
            </name>
            <name name-style="western">
              <surname>Edwards</surname>
              <given-names>RR</given-names>
            </name>
            <name name-style="western">
              <surname>Walton</surname>
              <given-names>DM</given-names>
            </name>
            <name name-style="western">
              <surname>Martel</surname>
              <given-names>MO</given-names>
            </name>
            <name name-style="western">
              <surname>Hudon</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Seminowicz</surname>
              <given-names>DA</given-names>
            </name>
          </person-group>
          <article-title>The multimodal assessment model of pain: a novel framework for further integrating the subjective pain experience within research and practice</article-title>
          <source>Clin J Pain</source>
          <year>2019</year>
          <volume>35</volume>
          <issue>3</issue>
          <fpage>212</fpage>
          <lpage>221</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/30444733"/>
          </comment>
          <pub-id pub-id-type="doi">10.1097/AJP.0000000000000670</pub-id>
          <pub-id pub-id-type="medline">30444733</pub-id>
          <pub-id pub-id-type="pmcid">PMC6382036</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Carr</surname>
              <given-names>DB</given-names>
            </name>
            <name name-style="western">
              <surname>Loeser</surname>
              <given-names>JD</given-names>
            </name>
            <name name-style="western">
              <surname>Morris</surname>
              <given-names>DB</given-names>
            </name>
          </person-group>
          <source>Narrative, Pain, and Suffering</source>
          <year>2005</year>
          <publisher-loc>Seattle, WA</publisher-loc>
          <publisher-name>IASP Press</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hall</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Powell</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Understanding the person through narrative</article-title>
          <source>Nurs Res Pract</source>
          <year>2011</year>
          <volume>2011</volume>
          <fpage>293837</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1155/2011/293837"/>
          </comment>
          <pub-id pub-id-type="doi">10.1155/2011/293837</pub-id>
          <pub-id pub-id-type="medline">21994820</pub-id>
          <pub-id pub-id-type="pmcid">PMC3169914</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Vindrola-Padros</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Johnson</surname>
              <given-names>GA</given-names>
            </name>
          </person-group>
          <article-title>The narrated, nonnarrated, and the disnarrated: conceptual tools for analyzing narratives in health services research</article-title>
          <source>Qual Health Res</source>
          <year>2014</year>
          <volume>24</volume>
          <issue>11</issue>
          <fpage>1603</fpage>
          <lpage>1611</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://journals.sagepub.com/doi/abs/10.1177/1049732314549019?url_ver=Z39.88-2003&#38;rfr_id=ori:rid:crossref.org&#38;rfr_dat=cr_pub  0pubmed"/>
          </comment>
          <pub-id pub-id-type="doi">10.1177/1049732314549019</pub-id>
          <pub-id pub-id-type="medline">25192757</pub-id>
          <pub-id pub-id-type="pii">1049732314549019</pub-id>
          <pub-id pub-id-type="pmcid">PMC4232312</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Noel</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Beals-Erickson</surname>
              <given-names>SE</given-names>
            </name>
            <name name-style="western">
              <surname>Law</surname>
              <given-names>EF</given-names>
            </name>
            <name name-style="western">
              <surname>Alberts</surname>
              <given-names>NM</given-names>
            </name>
            <name name-style="western">
              <surname>Palermo</surname>
              <given-names>TM</given-names>
            </name>
          </person-group>
          <article-title>Characterizing the pain narratives of parents of youth with chronic pain</article-title>
          <source>Clin J Pain</source>
          <year>2016</year>
          <volume>32</volume>
          <issue>10</issue>
          <fpage>849</fpage>
          <lpage>858</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/26736026"/>
          </comment>
          <pub-id pub-id-type="doi">10.1097/AJP.0000000000000346</pub-id>
          <pub-id pub-id-type="medline">26736026</pub-id>
          <pub-id pub-id-type="pmcid">PMC4935638</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Meldrum</surname>
              <given-names>ML</given-names>
            </name>
            <name name-style="western">
              <surname>Tsao</surname>
              <given-names>JC</given-names>
            </name>
            <name name-style="western">
              <surname>Zeltzer</surname>
              <given-names>LK</given-names>
            </name>
          </person-group>
          <article-title>"I can't be what I want to be": Children's narratives of chronic pain experiences and treatment outcomes</article-title>
          <source>Pain Med</source>
          <year>2009</year>
          <volume>10</volume>
          <issue>6</issue>
          <fpage>1018</fpage>
          <lpage>1034</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/19594848"/>
          </comment>
          <pub-id pub-id-type="doi">10.1111/j.1526-4637.2009.00650.x</pub-id>
          <pub-id pub-id-type="medline">19594848</pub-id>
          <pub-id pub-id-type="pii">PME650</pub-id>
          <pub-id pub-id-type="pmcid">PMC2758095</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>McGowan</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Luker</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Creed</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Chew-Graham</surname>
              <given-names>CA</given-names>
            </name>
          </person-group>
          <article-title>How do you explain a pain that can't be seen?: The narratives of women with chronic pelvic pain and their disengagement with the diagnostic cycle</article-title>
          <source>Br J Health Psychol</source>
          <year>2007</year>
          <volume>12</volume>
          <issue>Pt 2</issue>
          <fpage>261</fpage>
          <lpage>274</lpage>
          <pub-id pub-id-type="doi">10.1348/135910706X104076</pub-id>
          <pub-id pub-id-type="medline">17456285</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Dysvik</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Natvig</surname>
              <given-names>GK</given-names>
            </name>
            <name name-style="western">
              <surname>Furnes</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>A narrative approach to explore grief experiences and treatment adherence in people with chronic pain after participation in a pain-management program: a 6-year follow-up study</article-title>
          <source>Patient Prefer Adherence</source>
          <year>2013</year>
          <volume>7</volume>
          <fpage>751</fpage>
          <lpage>759</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/23990710"/>
          </comment>
          <pub-id pub-id-type="doi">10.2147/PPA.S46272</pub-id>
          <pub-id pub-id-type="medline">23990710</pub-id>
          <pub-id pub-id-type="pii">ppa-7-751</pub-id>
          <pub-id pub-id-type="pmcid">PMC3749063</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Nieto</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Sora</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Boixadós</surname>
              <given-names>Mercè</given-names>
            </name>
            <name name-style="western">
              <surname>Ruiz</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Understanding the experience of functional abdominal pain through written narratives by families</article-title>
          <source>Pain Med</source>
          <year>2020</year>
          <volume>21</volume>
          <issue>6</issue>
          <fpage>1093</fpage>
          <lpage>1105</lpage>
          <pub-id pub-id-type="doi">10.1093/pm/pnz147</pub-id>
          <pub-id pub-id-type="medline">31361016</pub-id>
          <pub-id pub-id-type="pii">5540706</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sora</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Nieto</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Vall-Roqué</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Conesa</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Pérez-Navarro</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Saigí-Rubió</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <article-title>Chronic neck and low back pain from personal experiences: a written narrative approach</article-title>
          <source>Pain Manag</source>
          <year>2024</year>
          <volume>14</volume>
          <issue>4</issue>
          <fpage>183</fpage>
          <lpage>194</lpage>
          <pub-id pub-id-type="doi">10.1080/17581869.2024.2343648</pub-id>
          <pub-id pub-id-type="medline">38717373</pub-id>
          <pub-id pub-id-type="pmcid">PMC11229442</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Serrat</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Sora</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Ureña</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Vall-Roqué</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Edo-Gual</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Nieto</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Written narratives to understand the experience of individuals living with fibromyalgia</article-title>
          <source>Musculoskeletal Care</source>
          <year>2024</year>
          <volume>22</volume>
          <issue>2</issue>
          <fpage>e1905</fpage>
          <pub-id pub-id-type="doi">10.1002/msc.1905</pub-id>
          <pub-id pub-id-type="medline">39031673</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kahtan</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Jordan</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Forget</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Is pain ever acceptable? A qualitative exploration concerning adult perceptions of chronic pain</article-title>
          <source>Eur J Pain</source>
          <year>2024</year>
          <volume>28</volume>
          <issue>7</issue>
          <fpage>1213</fpage>
          <lpage>1225</lpage>
          <pub-id pub-id-type="doi">10.1002/ejp.2255</pub-id>
          <pub-id pub-id-type="medline">38400800</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Pennebaker</surname>
              <given-names>JW</given-names>
            </name>
            <name name-style="western">
              <surname>Seagal</surname>
              <given-names>JD</given-names>
            </name>
          </person-group>
          <article-title>Forming a story: the health benefits of narrative</article-title>
          <source>J Clin Psychol</source>
          <year>1999</year>
          <volume>55</volume>
          <issue>10</issue>
          <fpage>1243</fpage>
          <lpage>1254</lpage>
          <pub-id pub-id-type="doi">10.1002/(SICI)1097-4679(199910)55:10&#60;1243::AID-JCLP6&#62;3.0.CO;2-N</pub-id>
          <pub-id pub-id-type="medline">11045774</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Abd-Elsayed</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Robinson</surname>
              <given-names>CL</given-names>
            </name>
            <name name-style="western">
              <surname>Marshall</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Diwan</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Peters</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Applications of artificial intelligence in pain medicine</article-title>
          <source>Curr Pain Headache Rep</source>
          <year>2024</year>
          <volume>28</volume>
          <issue>4</issue>
          <fpage>229</fpage>
          <lpage>238</lpage>
          <pub-id pub-id-type="doi">10.1007/s11916-024-01224-8</pub-id>
          <pub-id pub-id-type="medline">38345695</pub-id>
          <pub-id pub-id-type="pii">10.1007/s11916-024-01224-8</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Vaid</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Landi</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Nadkarni</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Nabeel</surname>
              <given-names>I</given-names>
            </name>
          </person-group>
          <article-title>Using fine-tuned large language models to parse clinical notes in musculoskeletal pain disorders</article-title>
          <source>Lancet Digit Health</source>
          <year>2023</year>
          <volume>12</volume>
          <fpage>e855</fpage>
          <lpage>e858</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S2589-7500(23)00202-9"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/S2589-7500(23)00202-9</pub-id>
          <pub-id pub-id-type="medline">39492289</pub-id>
          <pub-id pub-id-type="pii">S2589-7500(23)00202-9</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Shrestha</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Shen</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Zaidat</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Duey</surname>
              <given-names>AH</given-names>
            </name>
            <name name-style="western">
              <surname>Tang</surname>
              <given-names>JE</given-names>
            </name>
            <name name-style="western">
              <surname>Ahmed</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Hoang</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Restrepo Mejia</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Rajjoub</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Markowitz</surname>
              <given-names>JS</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>JS</given-names>
            </name>
            <name name-style="western">
              <surname>Cho</surname>
              <given-names>SK</given-names>
            </name>
          </person-group>
          <article-title>Performance of ChatGPT on NASS clinical guidelines for the diagnosis and treatment of low back pain: a comparison study</article-title>
          <source>Spine</source>
          <year>2024</year>
          <volume>49</volume>
          <issue>9</issue>
          <fpage>640</fpage>
          <lpage>651</lpage>
          <pub-id pub-id-type="doi">10.1097/BRS.0000000000004915</pub-id>
          <pub-id pub-id-type="medline">38213186</pub-id>
          <pub-id pub-id-type="pii">00007632-990000000-00555</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gianola</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Bargeri</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Castellini</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Cook</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Palese</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Pillastrini</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Salvalaggio</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Turolla</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Rossettini</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Performance of ChatGPT compared to clinical practice guidelines in making informed decisions for lumbosacral radicular pain: a cross-sectional study</article-title>
          <source>J Orthop Sports Phys Ther</source>
          <year>2024</year>
          <volume>54</volume>
          <issue>3</issue>
          <fpage>222</fpage>
          <lpage>228</lpage>
          <pub-id pub-id-type="doi">10.2519/jospt.2024.12151</pub-id>
          <pub-id pub-id-type="medline">38284363</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sallam</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>ChatGPT utility in healthcare education, research, and practice: systematic review on the promising perspectives and valid concerns</article-title>
          <source>Healthcare</source>
          <year>2023</year>
          <volume>11</volume>
          <issue>6</issue>
          <fpage>887</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.mdpi.com/resolver?pii=healthcare11060887"/>
          </comment>
          <pub-id pub-id-type="doi">10.3390/healthcare11060887</pub-id>
          <pub-id pub-id-type="medline">36981544</pub-id>
          <pub-id pub-id-type="pii">healthcare11060887</pub-id>
          <pub-id pub-id-type="pmcid">PMC10048148</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gwet</surname>
              <given-names>KL</given-names>
            </name>
          </person-group>
          <source>Handbook of Inter-Rater Reliability: The Definitive Guide to Measuring the Extent of Agreement Among Raters</source>
          <year>2014</year>
          <publisher-loc>Gaithersburg, MD</publisher-loc>
          <publisher-name>Advanced Analytics, LLC</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Krippendorff</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Measuring the reliability of qualitative text analysis data</article-title>
          <source>Qual Quant</source>
          <year>2004</year>
          <volume>38</volume>
          <fpage>787</fpage>
          <lpage>800</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1007/s11135-004-8107-7"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/s11135-004-8107-7</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Goh</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Gallo</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Hom</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Strong</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Weng</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Kerman</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Cool</surname>
              <given-names>JA</given-names>
            </name>
            <name name-style="western">
              <surname>Kanjee</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Parsons</surname>
              <given-names>AS</given-names>
            </name>
            <name name-style="western">
              <surname>Ahuja</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Horvitz</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Milstein</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Olson</surname>
              <given-names>APJ</given-names>
            </name>
            <name name-style="western">
              <surname>Rodman</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>JH</given-names>
            </name>
          </person-group>
          <article-title>Large language model influence on diagnostic reasoning: a randomized clinical trial</article-title>
          <source>JAMA Netw Open</source>
          <year>2024</year>
          <volume>7</volume>
          <issue>10</issue>
          <fpage>e2440969</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://jamanetwork.com/journals/jamanetworkopen/fullarticle/10.1001/jamanetworkopen.2024.40969"/>
          </comment>
          <pub-id pub-id-type="doi">10.1001/jamanetworkopen.2024.40969</pub-id>
          <pub-id pub-id-type="medline">39466245</pub-id>
          <pub-id pub-id-type="pii">2825395</pub-id>
          <pub-id pub-id-type="pmcid">PMC11519755</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bennett</surname>
              <given-names>RM</given-names>
            </name>
            <name name-style="western">
              <surname>Friend</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Jones</surname>
              <given-names>KD</given-names>
            </name>
            <name name-style="western">
              <surname>Ward</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Han</surname>
              <given-names>BK</given-names>
            </name>
            <name name-style="western">
              <surname>Ross</surname>
              <given-names>RL</given-names>
            </name>
          </person-group>
          <article-title>The Revised Fibromyalgia Impact Questionnaire (FIQR): validation and psychometric properties</article-title>
          <source>Arthritis Res Ther</source>
          <year>2009</year>
          <volume>11</volume>
          <issue>4</issue>
          <fpage>R120</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arthritis-research.biomedcentral.com/articles/10.1186/ar2783"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/ar2783</pub-id>
          <pub-id pub-id-type="medline">19664287</pub-id>
          <pub-id pub-id-type="pii">ar2783</pub-id>
          <pub-id pub-id-type="pmcid">PMC2745803</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Luciano</surname>
              <given-names>JV</given-names>
            </name>
            <name name-style="western">
              <surname>Aguado</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Serrano-Blanco</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Calandre</surname>
              <given-names>EP</given-names>
            </name>
            <name name-style="western">
              <surname>Rodriguez-Lopez</surname>
              <given-names>CM</given-names>
            </name>
          </person-group>
          <article-title>Dimensionality, reliability, and validity of the Revised Fibromyalgia Impact Questionnaire in two Spanish samples</article-title>
          <source>Arthritis Care Res</source>
          <year>2013</year>
          <volume>65</volume>
          <issue>10</issue>
          <fpage>1682</fpage>
          <lpage>1689</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://onlinelibrary.wiley.com/doi/10.1002/acr.22034"/>
          </comment>
          <pub-id pub-id-type="doi">10.1002/acr.22034</pub-id>
          <pub-id pub-id-type="medline">23609980</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Luciano</surname>
              <given-names>JV</given-names>
            </name>
            <name name-style="western">
              <surname>D'Amico</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Cerdà-Lafont</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Peñarrubia-María</surname>
              <given-names>MT</given-names>
            </name>
            <name name-style="western">
              <surname>Knapp</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Cuesta-Vargas</surname>
              <given-names>AI</given-names>
            </name>
            <name name-style="western">
              <surname>Serrano-Blanco</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>García-Campayo</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Cost-utility of cognitive behavioral therapy versus U.S. food and drug administration recommended drugs and usual care in the treatment of patients with fibromyalgia: an economic evaluation alongside a 6-month randomized controlled trial</article-title>
          <source>Arthritis Res Ther</source>
          <year>2014</year>
          <volume>16</volume>
          <issue>5</issue>
          <fpage>451</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arthritis-research.biomedcentral.com/articles/10.1186/s13075-014-0451-y"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s13075-014-0451-y</pub-id>
          <pub-id pub-id-type="medline">25270426</pub-id>
          <pub-id pub-id-type="pii">s13075-014-0451-y</pub-id>
          <pub-id pub-id-type="pmcid">PMC4203881</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gómez-Pérez</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>López-Martínez</surname>
              <given-names>AE</given-names>
            </name>
            <name name-style="western">
              <surname>Ruiz-Párraga</surname>
              <given-names>GT</given-names>
            </name>
          </person-group>
          <article-title>Psychometric properties of the spanish version of the Tampa Scale for Kinesiophobia (TSK)</article-title>
          <source>J Pain</source>
          <year>2011</year>
          <volume>12</volume>
          <issue>4</issue>
          <fpage>425</fpage>
          <lpage>435</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1526-5900(10)00695-4"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jpain.2010.08.004</pub-id>
          <pub-id pub-id-type="medline">20926355</pub-id>
          <pub-id pub-id-type="pii">S1526-5900(10)00695-4</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Artstein</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Poesio</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Inter-coder agreement for computational linguistics</article-title>
          <source>Comput Linguist</source>
          <year>2008</year>
          <volume>34</volume>
          <fpage>555</fpage>
          <lpage>596</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1162/coli.07-034-R2"/>
          </comment>
          <pub-id pub-id-type="doi">10.1162/coli.07-034-r2</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref38">
        <label>38</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gwet</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Fergadis</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Chance-corrected agreement coefficients</article-title>
          <source>irrCAC</source>
          <access-date>2025-03-20</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://irrcac.readthedocs.io/en/latest/index.html">https://irrcac.readthedocs.io/en/latest/index.html</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref39">
        <label>39</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Quartana</surname>
              <given-names>PJ</given-names>
            </name>
            <name name-style="western">
              <surname>Campbell</surname>
              <given-names>CM</given-names>
            </name>
            <name name-style="western">
              <surname>Edwards</surname>
              <given-names>RR</given-names>
            </name>
          </person-group>
          <article-title>Pain catastrophizing: a critical review</article-title>
          <source>Expert Rev Neurother</source>
          <year>2009</year>
          <volume>9</volume>
          <issue>5</issue>
          <fpage>745</fpage>
          <lpage>758</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/19402782"/>
          </comment>
          <pub-id pub-id-type="doi">10.1586/ern.09.34</pub-id>
          <pub-id pub-id-type="medline">19402782</pub-id>
          <pub-id pub-id-type="pmcid">PMC2696024</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref40">
        <label>40</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ji</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Yu</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Ishii</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Fung</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <person-group person-group-type="editor">
            <name name-style="western">
              <surname>Bouamor</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Pino</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Bali</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Towards mitigating LLM hallucination via self reflection</article-title>
          <source>In Findings of the Association for Computational Linguistics: EMNLP 2023</source>
          <year>2023</year>
          <publisher-loc>Singapore</publisher-loc>
          <publisher-name>Association for Computational Linguistics</publisher-name>
          <fpage>1827</fpage>
          <lpage>1843</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref41">
        <label>41</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Fulgu</surname>
              <given-names>RA</given-names>
            </name>
            <name name-style="western">
              <surname>Capraro</surname>
              <given-names>V</given-names>
            </name>
          </person-group>
          <article-title>Surprising gender biases in GPT</article-title>
          <source>Comput Hum Behav Rep</source>
          <year>2024</year>
          <volume>16</volume>
          <fpage>100533</fpage>
          <pub-id pub-id-type="doi">10.1016/j.chbr.2024.100533</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref42">
        <label>42</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Vaira</surname>
              <given-names>LA</given-names>
            </name>
            <name name-style="western">
              <surname>Lechien</surname>
              <given-names>JR</given-names>
            </name>
            <name name-style="western">
              <surname>Abbate</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Allevi</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Audino</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Beltramini</surname>
              <given-names>GA</given-names>
            </name>
            <name name-style="western">
              <surname>Bergonzani</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Boscolo-Rizzo</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Califano</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Cammaroto</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Chiesa-Estomba</surname>
              <given-names>CM</given-names>
            </name>
            <name name-style="western">
              <surname>Committeri</surname>
              <given-names>U</given-names>
            </name>
            <name name-style="western">
              <surname>Crimi</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Curran</surname>
              <given-names>NR</given-names>
            </name>
            <name name-style="western">
              <surname>di Bello</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>di Stadio</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Frosolini</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Gabriele</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Gengler</surname>
              <given-names>IM</given-names>
            </name>
            <name name-style="western">
              <surname>Lonardi</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Maglitto</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Mayo-Yáñez</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Petrocelli</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Pucci</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Saibene</surname>
              <given-names>AM</given-names>
            </name>
            <name name-style="western">
              <surname>Saponaro</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Tel</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Trabalzini</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Trecca</surname>
              <given-names>EMC</given-names>
            </name>
            <name name-style="western">
              <surname>Vellone</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Salzano</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>De Riu</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Validation of the Quality Analysis of Medical Artificial Intelligence (QAMAI) tool: a new tool to assess the quality of health information provided by AI platforms</article-title>
          <source>Eur Arch Otorhinolaryngol</source>
          <year>2024</year>
          <volume>281</volume>
          <issue>11</issue>
          <fpage>6123</fpage>
          <lpage>6131</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://air.unimi.it/handle/2434/1048609"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/s00405-024-08710-0</pub-id>
          <pub-id pub-id-type="medline">38703195</pub-id>
          <pub-id pub-id-type="pii">10.1007/s00405-024-08710-0</pub-id>
          <pub-id pub-id-type="pmcid">PMC11512889</pub-id>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
