<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMIR</journal-id>
      <journal-id journal-id-type="nlm-ta">J Med Internet Res</journal-id>
      <journal-title>Journal of Medical Internet Research</journal-title>
      <issn pub-type="epub">1438-8871</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v26i1e56500</article-id>
      <article-id pub-id-type="pmid">39167785</article-id>
      <article-id pub-id-type="doi">10.2196/56500</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Comparing GPT-4 and Human Researchers in Health Care Data Analysis: Qualitative Description Study</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>de Azevedo Cardoso</surname>
            <given-names>Taiane</given-names>
          </name>
        </contrib>
        <contrib contrib-type="editor">
          <name>
            <surname>Jin</surname>
            <given-names>Qiao</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Zhu</surname>
            <given-names>Lingxuan</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Rony</surname>
            <given-names>Moustaq Karim Khan</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Zhong</surname>
            <given-names>Yaping</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Li</surname>
            <given-names>Kevin Danis</given-names>
          </name>
          <degrees>BS</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>Department of Urology</institution>
            <institution>University of California San Francisco</institution>
            <addr-line>400 Parnassus Ave</addr-line>
            <addr-line>San Francisco, CA</addr-line>
            <country>United States</country>
            <phone>1 415 353 2200</phone>
            <email>kevin.d.li@ucsf.edu</email>
          </address>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-0550-0250</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author">
          <name name-style="western">
            <surname>Fernandez</surname>
            <given-names>Adrian M</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-9918-5783</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Schwartz</surname>
            <given-names>Rachel</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <xref rid="aff4" ref-type="aff">4</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-8125-9492</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Rios</surname>
            <given-names>Natalie</given-names>
          </name>
          <degrees>BS</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-2583-1241</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author">
          <name name-style="western">
            <surname>Carlisle</surname>
            <given-names>Marvin Nathaniel</given-names>
          </name>
          <degrees>BS</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0007-6467-0758</ext-link>
        </contrib>
        <contrib id="contrib6" contrib-type="author">
          <name name-style="western">
            <surname>Amend</surname>
            <given-names>Gregory M</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff5" ref-type="aff">5</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-5843-8641</ext-link>
        </contrib>
        <contrib id="contrib7" contrib-type="author">
          <name name-style="western">
            <surname>Patel</surname>
            <given-names>Hiren V</given-names>
          </name>
          <degrees>MD, PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-6475-670X</ext-link>
        </contrib>
        <contrib id="contrib8" contrib-type="author">
          <name name-style="western">
            <surname>Breyer</surname>
            <given-names>Benjamin N</given-names>
          </name>
          <degrees>MAS, MD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-0532-048X</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Department of Urology</institution>
        <institution>University of California San Francisco</institution>
        <addr-line>San Francisco, CA</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Department of Epidemiology and Biostatistics</institution>
        <institution>University of California San Francisco</institution>
        <addr-line>San Francisco, CA</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>Department of Anesthesia and Perioperative Care</institution>
        <institution>University of California San Francisco</institution>
        <addr-line>San Francisco, CA</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff4">
        <label>4</label>
        <institution>Division of General Internal Medicine, Department of Medicine</institution>
        <institution>University of California San Francisco</institution>
        <addr-line>San Francisco, CA</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff5">
        <label>5</label>
        <institution>Department of Urology</institution>
        <institution>Icahn School of Medicine at Mount Sinai</institution>
        <addr-line>New York, NY</addr-line>
        <country>United States</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Kevin Danis Li <email>kevin.d.li@ucsf.edu</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <year>2024</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>21</day>
        <month>8</month>
        <year>2024</year>
      </pub-date>
      <volume>26</volume>
      <elocation-id>e56500</elocation-id>
      <history>
        <date date-type="received">
          <day>17</day>
          <month>1</month>
          <year>2024</year>
        </date>
        <date date-type="rev-request">
          <day>14</day>
          <month>5</month>
          <year>2024</year>
        </date>
        <date date-type="rev-recd">
          <day>31</day>
          <month>5</month>
          <year>2024</year>
        </date>
        <date date-type="accepted">
          <day>9</day>
          <month>7</month>
          <year>2024</year>
        </date>
      </history>
      <copyright-statement>©Kevin Danis Li, Adrian M Fernandez, Rachel Schwartz, Natalie Rios, Marvin Nathaniel Carlisle, Gregory M Amend, Hiren V Patel, Benjamin N Breyer. Originally published in the Journal of Medical Internet Research (https://www.jmir.org), 21.08.2024.</copyright-statement>
      <copyright-year>2024</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in the Journal of Medical Internet Research (ISSN 1438-8871), is properly cited. The complete bibliographic information, a link to the original publication on https://www.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://www.jmir.org/2024/1/e56500" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Large language models including GPT-4 (OpenAI) have opened new avenues in health care and qualitative research. Traditional qualitative methods are time-consuming and require expertise to capture nuance. Although large language models have demonstrated enhanced contextual understanding and inferencing compared with traditional natural language processing, their performance in qualitative analysis versus that of humans remains unexplored.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>We evaluated the effectiveness of GPT-4 versus human researchers in qualitative analysis of interviews with patients with adult-acquired buried penis (AABP).</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>Qualitative data were obtained from semistructured interviews with 20 patients with AABP. Human analysis involved a structured 3-stage process—initial observations, line-by-line coding, and consensus discussions to refine themes. In contrast, artificial intelligence (AI) analysis with GPT-4 underwent two phases: (1) a naïve phase, where GPT-4 outputs were independently evaluated by a blinded reviewer to identify themes and subthemes and (2) a comparison phase, where AI-generated themes were compared with human-identified themes to assess agreement. We used a general qualitative description approach.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>The study population (N=20) comprised predominantly White (17/20, 85%), married (12/20, 60%), heterosexual (19/20, 95%) men, with a mean age of 58.8 years and BMI of 41.1 kg/m<sup>2</sup>. Human qualitative analysis identified “urinary issues” in 95% (19/20) and GPT-4 in 75% (15/20) of interviews, with the subtheme “spray or stream” noted in 60% (12/20) and 35% (7/20), respectively. “Sexual issues” were prominent (19/20, 95% humans vs 16/20, 80% GPT-4), although humans identified a wider range of subthemes, including “pain with sex or masturbation” (7/20, 35%) and “difficulty with sex or masturbation” (4/20, 20%). Both analyses similarly highlighted “mental health issues” (11/20, 55%, both), although humans coded “depression” more frequently (10/20, 50% humans vs 4/20, 20% GPT-4). Humans frequently cited “issues using public restrooms” (12/20, 60%) as impacting social life, whereas GPT-4 emphasized “struggles with romantic relationships” (9/20, 45%). “Hygiene issues” were consistently recognized (14/20, 70% humans vs 13/20, 65% GPT-4). Humans uniquely identified “contributing factors” as a theme in all interviews. There was moderate agreement between human and GPT-4 coding (κ=0.401). Reliability assessments of GPT-4’s analyses showed consistent coding for themes including “body image struggles,” “chronic pain” (10/10, 100%), and “depression” (9/10, 90%). Other themes like “motivation for surgery” and “weight challenges” were reliably coded (8/10, 80%), while less frequent themes were variably identified across multiple iterations.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>Large language models including GPT-4 can effectively identify key themes in analyzing qualitative health care data, showing moderate agreement with human analysis. While human analysis provided a richer diversity of subthemes, the consistency of AI suggests its use as a complementary tool in qualitative research. With AI rapidly advancing, future studies should iterate analyses and circumvent token limitations by segmenting data, furthering the breadth and depth of large language model–driven qualitative analyses.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>artificial intelligence</kwd>
        <kwd>ChatGPT</kwd>
        <kwd>large language models</kwd>
        <kwd>qualitative analysis</kwd>
        <kwd>content analysis</kwd>
        <kwd>buried penis</kwd>
        <kwd>qualitative interviews</kwd>
        <kwd>qualitative description</kwd>
        <kwd>urology</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <p>Recent advancements in artificial intelligence (AI), particularly in large language models, have significantly expanded their applications in health care and academic research. These developments raise critical questions about their potential and ethical use [<xref ref-type="bibr" rid="ref1">1</xref>-<xref ref-type="bibr" rid="ref3">3</xref>]. GPT-4, developed by OpenAI, is a large language model that uses deep learning algorithms, specifically the GPT, to process and generate human-like text [<xref ref-type="bibr" rid="ref4">4</xref>]. Its training on diverse internet text sources through unsupervised learning enables it to interpret complex language data, making it a potentially invaluable tool for qualitative research [<xref ref-type="bibr" rid="ref5">5</xref>]. This is especially important in areas where traditional qualitative data analysis is labor-intensive and requires expertise to understand subtle nuances [<xref ref-type="bibr" rid="ref6">6</xref>]. Furthermore, it is unknown how AI-driven qualitative analysis may differ from human-driven analysis in research contexts.</p>
      <p>Despite its potential, the application of AI and large language models to qualitative data remains underexplored [<xref ref-type="bibr" rid="ref7">7</xref>,<xref ref-type="bibr" rid="ref8">8</xref>]. Previous studies in the realm of qualitative data analysis have used traditional natural language processing (NLP) models, which often require benchmark-specific training and hand engineering, leading to a more constrained contextual understanding and inferencing abilities. For example, Lennon et al [<xref ref-type="bibr" rid="ref9">9</xref>] combined human coding with an NLP system trained on internal data, significantly reducing coding time, while Cheligeer et al [<xref ref-type="bibr" rid="ref10">10</xref>] used a model based on BERT (Bidirectional Encoder Representations from Transformers; Google) for faster keyword analysis. However, such models fall short of the advanced contextual and inferencing abilities exhibited by widely trained large language models like GPT-4, which has been shown to outperform traditional systems on standard NLP benchmarks [<xref ref-type="bibr" rid="ref11">11</xref>]. Although the field is rapidly evolving, there remains a limited number of studies that directly compare AI-driven qualitative analysis with human-driven approaches [<xref ref-type="bibr" rid="ref12">12</xref>-<xref ref-type="bibr" rid="ref17">17</xref>].</p>
      <p>In this study, we used GPT-4 to re-examine qualitative data from a previously published study of 20 patients with adult-acquired buried penis (AABP), a urological condition with significant psychosocial consequences, and compare its performance with that of human researchers [<xref ref-type="bibr" rid="ref18">18</xref>]. Evaluating GPT-4 for qualitative analysis in this patient population is particularly important due to the unique and profound psychosocial distress associated with AABP, including issues related to body image, sexual function, and mental health. Understanding patients’ experiences through qualitative analysis can provide an increased understanding of their lived experiences. To accomplish these objectives, we created a series of generalizable prompts that allow the application of GPT-4 to qualitative analysis without requiring specialized knowledge or skills [<xref ref-type="bibr" rid="ref19">19</xref>]. Finally, we evaluated the validity of our approach by measuring agreement between GPT-4 and human analysis and reliability by assessing if prompts consistently elicited similar outputs from the same data.</p>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Data Source</title>
        <p>Qualitative data were from a convenience sample of 20 patients who presented to urology clinics participating in TURNS (Trauma and Urologic Reconstructive Network of Surgeons), a multi-institutional collaborative research group focused on urologic trauma and reconstruction [<xref ref-type="bibr" rid="ref18">18</xref>]. We conducted semistructured interviews focusing on the impact of AABP on personal relationships, social life, mental health, and physical health. Participants were interviewed for 15 to 30 minutes, and audio was transcribed electronically using Otter transcription software [<xref ref-type="bibr" rid="ref20">20</xref>]. Interviews were conducted over Zoom live video conferencing [<xref ref-type="bibr" rid="ref21">21</xref>]. For both human and GPT-4 qualitative analyses, only deidentified text transcripts were used, ensuring that the qualitative data were interpreted solely from text, providing a comparable basis for both human and AI-driven analyses.</p>
      </sec>
      <sec>
        <title>Human Analysis</title>
        <p>Our human-driven analysis used a general qualitative description approach which differs from other qualitative methods in that the analytic process stays close to the data, describing informants’ experiences using their own language [<xref ref-type="bibr" rid="ref22">22</xref>-<xref ref-type="bibr" rid="ref24">24</xref>]. The research team initially reviewed interview transcripts, taking notes to capture observations and ideas and facilitate a comprehensive understanding of the overall content. This preparatory work informed the subsequent structured coding process. To ensure consistency and reliability, the team convened at three key stages, which were (1) before coding, to share initial text impressions and establish a standardized coding protocol; (2) after initiating line-by-line coding, to discuss applied codes and refine categorization strategies; and (3) to assess coder interrater reliability using weighted Fleiss κ coefficients [<xref ref-type="bibr" rid="ref25">25</xref>]. Codes with a κ value below 0.75 were discussed among all authors until a coding consensus was reached. This approach enabled the identification and categorization of relevant subthemes and themes.</p>
      </sec>
      <sec>
        <title>AI Analysis</title>
        <p>Each deidentified transcript underwent text formatting removal before analysis by GPT-4 using a standardized prompt set (<xref rid="figure1" ref-type="fig">Figure 1</xref>) [<xref ref-type="bibr" rid="ref26">26</xref>]. The analysis of the GPT-4–generated output was conducted in 2 phases, the naïve phase and the comparison phase.</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>Procedure for using GPT-4 for qualitative description.</p>
          </caption>
          <graphic xlink:href="jmir_v26i1e56500_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <p>In the naïve phase, GPT-4’s outputs for each interview were examined to extract relevant codes and quotes. These were then combined into subthemes, with groupings based on conceptual coherence and content relevance, following a standard qualitative description process [<xref ref-type="bibr" rid="ref24">24</xref>]. Subsequently, similar subthemes were grouped to form overarching themes. Multiple iterations were conducted to refine the subthemes before synthesizing generalizations that held true across the data. Memo writing was integral to this process, capturing the evolving understanding of the data. Importantly, no discussions with the human-analyst team were conducted during this phase to avoid biasing the process. All interactions and evaluations of GPT-4’s analyses were conducted by a blinded reviewer (KDL) who was not involved in the initial human-driven analysis and kept naïve to its outcomes.</p>
        <p>In the comparison phase, AI-identified subthemes and themes were compared against those previously identified through human-driven analysis. This phase focused on identifying parallels and alignments between the 2 analyses to provide a direct comparison.</p>
        <p>Interview data were collected in 2021, and human analyses were completed by 2022. All GPT-4 analyses were processed in separate instances on December 1, 2023, using the latest model of GPT-4 available at that time.</p>
      </sec>
      <sec>
        <title>Measures to Ensure Rigor</title>
        <p>The analytic team included KDL, who is a medical and data science master’s student, NR, who is a clinical research coordinator with extensive experience in managing and coordinating clinical studies in health care settings, and GMA, who is a fellowship-trained surgeon specializing in urologic conditions, including adult acquired buried penis. In addition, we consulted BNB, an expert in urologic reconstruction who frequently treats patients with buried penis, to provide in-depth clinical insights and ensure the medical accuracy of our interpretations, and RS, a health services researcher and communication scientist with expertise in qualitative methods, to guide us on appropriate methodologies and ensure the rigor of our analyses.</p>
        <p>To ensure rigor, we implemented several strategies addressing credibility, transferability, dependability, and confirmability [<xref ref-type="bibr" rid="ref27">27</xref>]. For credibility, we built patient rapport through prolonged engagement, as most patients had existing longitudinal relationships at the urology clinics where they received care, allowing for deeper insights into their experiences. For transferability, we reported clinical characteristics of the study participants to inform the applicability of our findings to other populations with AABP and used a multi-institutional sampling strategy to account for potential geographic or local institutional characteristics, ensuring broader applicability of our results.</p>
        <p>Dependability was ensured through methodological documentation, where all codes, subthemes, and themes were documented at each step to provide transparency and replicability of our coding decisions. We also maintained detailed audit trails of raw outputs from GPT-4, processed outputs, and the subsequent organization into subthemes and themes, which the team reviewed to ensure consistency and reliability. Confirmability was achieved by having BNB, an expert in urologic reconstruction, review the study findings and provide critical insights during the design phase, and RS, who provided qualitative methodological support. In addition, data were shared with the entire research team, and feedback from all coauthors was incorporated into subsequent interpretation and analysis.</p>
      </sec>
      <sec>
        <title>Comparison of Analyses</title>
        <p>Qualitative analyses, including themes and subthemes, were summarized using descriptive statistics, including frequencies and proportions. To visually represent an agreement between human and AI-identified themes (validity), an agreement matrix was constructed. We measured interrater reliability using Cohen κ coefficient. A separate analysis was performed 10 times on the same interview transcript to assess the reliability of GPT-4’s analysis. Themes identified exclusively by GPT-4 were highlighted with exemplar quotes that best represented each theme. All analyses were performed using R statistical software (version 4.3.1; The R Foundation).</p>
      </sec>
      <sec>
        <title>Ethical Considerations</title>
        <p>The study was approved by the University of California San Francisco (UCSF) institutional review board (IRB; 20-32062), and consent was obtained from all participants. In addition to the original study’s IRB approval, we obtained an exemption from our institution’s IRB for the secondary analysis using GPT-4, as the data were deidentified. Before analysis, all transcripts were reviewed to ensure that they contained no protected health information or identifiable data to maintain participant confidentiality. We used a private instance of GPT-4, known as Versa, which operates independently of OpenAI’s commercial model and does not retain or learn from the data inputted [<xref ref-type="bibr" rid="ref28">28</xref>]. This instance was used to develop our AI qualitative analysis methodology. For subsequent analyses, all data were confirmed to be thoroughly deidentified before using the commercial version of GPT-4.</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Study Population</title>
        <p>Participant characteristics are summarized in <xref ref-type="table" rid="table1">Table 1</xref>. Participants’ mean age and BMI were 58.8 (SD 13.9) years and 41.1 (SD 9.4) kg/m<sup>2</sup>, respectively. Most participants were White (17/20, 85%), married (12/20, 60%), heterosexual (19/20, 95%) men residing in the Western region of the United States (10/20, 50%). In total, 55% (11/20) of participants underwent surgical correction of their AABP, with interviews conducted at an average of 497 (SD 666) days after surgery.</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Participant demographics and characteristics.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="670"/>
            <col width="300"/>
            <thead>
              <tr valign="top">
                <td colspan="2">Characteristics</td>
                <td>Values</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="2">Age (years), mean (SD)</td>
                <td>58.8 (13.9)</td>
              </tr>
              <tr valign="top">
                <td colspan="2">BMI (kg/m<sup>2</sup>), mean (SD)</td>
                <td>41.1 (9.4)</td>
              </tr>
              <tr valign="top">
                <td colspan="3">
                  <bold>Self-identified race, n (%)</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>White</td>
                <td>17 (85)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Black or African American</td>
                <td>1 (5)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Other</td>
                <td>2 (10)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Hispanic or Latin ethnicity</td>
                <td>3 (15)</td>
              </tr>
              <tr valign="top">
                <td colspan="3">
                  <bold>Relationship status, n (%)</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Married</td>
                <td>12 (60)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Single</td>
                <td>6 (30)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>In a relationship</td>
                <td>2 (10)</td>
              </tr>
              <tr valign="top">
                <td colspan="3">
                  <bold>Sexual orientation, n (%)</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Heterosexual</td>
                <td>19 (95)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Homosexual</td>
                <td>1 (5)</td>
              </tr>
              <tr valign="top">
                <td colspan="3">
                  <bold>Region, n (%)</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>West</td>
                <td>10 (50)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Northeast</td>
                <td>7 (35)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Midwest</td>
                <td>2 (10)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>South</td>
                <td>1 (5)</td>
              </tr>
              <tr valign="top">
                <td colspan="3">
                  <bold>Patients who underwent AABP<sup>a</sup> surgical correction (n=11, 55%), n (%)</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Escutcheonectomy</td>
                <td>9 (45)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Excision of penile skin with split-thickness skin graft</td>
                <td>6 (30)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Ventral slit scrotal flap</td>
                <td> 5 (25)</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table1fn1">
              <p><sup>a</sup>AABP: adult-acquired buried penis.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Qualitative Description</title>
        <p><xref ref-type="table" rid="table2">Table 2</xref> presents a comparative analysis of themes and subthemes identified by human researchers versus GPT-4. “Urinary issues” were common in interviews analyzed by human researchers (19/20, 95%) and GPT-4 (15/20, 75%). Issues with “spray or stream” were a notable subtheme (12/20, 60% humans vs 7/20, 35% GPT-4). “Sexual issues” were prominently coded as well, present in 95% (19/20) of human-analyzed interviews and 80% (18/20) by GPT-4, with “inability to perform intercourse” coded as a subtheme more frequently by human researchers (12/20, 60% vs 6/20, 30%). Humans coded a broader array of sexual function issues, such as “pain with sex or masturbation” (7/20, 35%) and “difficulty with sex or masturbation” (4/20, 20%). “Mental health issues” were similarly recognized by both humans and GPT-4 (11/20, 55%, both), with “depression” more frequently coded by humans compared with GPT-4 (10/20, 50% vs 4/20, 20%, respectively). “Impact on social life” was an additional significant theme, with humans coding “issues using public restrooms” (12/20, 60%), while GPT-4 emphasized “struggles with romantic relationships” (9/20, 45%). Both methods identified “hygiene issues” (14/20, 70% humans vs 13/20, 65% GPT-4), highlighting difficulties in maintaining cleanliness. Human researchers uniquely identified “contributing factors” as a theme in all interviews.</p>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>Human researchers versus GPT-4 qualitative analysis.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="470"/>
            <col width="320"/>
            <col width="180"/>
            <thead>
              <tr valign="top">
                <td colspan="2">Themes and subthemes</td>
                <td>Human researchers, n (%)</td>
                <td>GPT-4, n (%)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="2">
                  <bold>Urinary issues</bold>
                </td>
                <td>19 (95)</td>
                <td>15 (75)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Spray or stream</td>
                <td>12 (60)</td>
                <td>7 (35)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Hovers over toilet</td>
                <td>8 (40)</td>
                <td>—<sup>a</sup></td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Pain with urination</td>
                <td>7 (35)</td>
                <td>3 (15)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>History of urethral stricture disease</td>
                <td>3 (15)</td>
                <td>—</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Incontinence</td>
                <td>3 (15)</td>
                <td>3 (15)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Incomplete bladder emptying</td>
                <td>3 (15)</td>
                <td>2 (10)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Sits to urinate</td>
                <td>2 (10)</td>
                <td>—</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Smelly urine</td>
                <td>1 (5)</td>
                <td>1 (5)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Trouble with catheter</td>
                <td>1 (5)</td>
                <td>—</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Uses shower or tub to urinate</td>
                <td>1 (5)</td>
                <td>—</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Frequent urination</td>
                <td>—</td>
                <td>2 (10)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Getting up at night to urinate</td>
                <td>—</td>
                <td>1 (5)</td>
              </tr>
              <tr valign="top">
                <td colspan="2">
                  <bold>Sex issues</bold>
                </td>
                <td>19 (95)</td>
                <td>16 (80)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Unable to perform intercourse</td>
                <td>12 (60)</td>
                <td>6 (30)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Unable to get erection</td>
                <td>9 (45)</td>
                <td>3 (15)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Pain with sex or masturbation</td>
                <td>7 (35)</td>
                <td>—</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Difficulty with sex or masturbation</td>
                <td>4 (20)</td>
                <td>—</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Painful erection</td>
                <td>4 (20)</td>
                <td>—</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Unable to maintain erection</td>
                <td>3 (15)</td>
                <td>—</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Avoids sex</td>
                <td>2 (10)</td>
                <td>4 (20)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Unable to orgasm</td>
                <td>2 (10)</td>
                <td>—</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Reduced genital sensation</td>
                <td>1 (5)</td>
                <td>—</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Takes longer to orgasm</td>
                <td>1 (5)</td>
                <td>—</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Pain with ejaculation</td>
                <td>1 (5)</td>
                <td>—</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Intercourse not enjoyable</td>
                <td>1 (5)</td>
                <td>—</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Adaptive masturbation techniques</td>
                <td>—</td>
                <td>2 (10)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Poor cosmetic appearance</td>
                <td>—</td>
                <td>2 (10)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Painful erection</td>
                <td>—</td>
                <td>2 (10)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Brittle skin</td>
                <td>—</td>
                <td>1 (5)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Unable to use condom</td>
                <td>—</td>
                <td>1 (5)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Overuse of pornography</td>
                <td>—</td>
                <td>1 (5)</td>
              </tr>
              <tr valign="top">
                <td colspan="2">
                  <bold>Mental health issues</bold>
                </td>
                <td>11 (55)</td>
                <td>11 (55)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Depression</td>
                <td>10 (50)</td>
                <td>6 (30)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Feels like less of a man</td>
                <td>7 (35)</td>
                <td>4 (20)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Anxiety</td>
                <td>4 (20)</td>
                <td>2 (10)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Decreased self-esteem</td>
                <td>3 (15)</td>
                <td>—</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Stress</td>
                <td>1 (5)</td>
                <td>1 (5)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Emotional turmoil</td>
                <td>—</td>
                <td>2 (10)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Loss of confidence</td>
                <td>—</td>
                <td>1 (5)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Guilt</td>
                <td>—</td>
                <td>1 (5)</td>
              </tr>
              <tr valign="top">
                <td colspan="2">
                  <bold>Impacts social life</bold>
                </td>
                <td>16 (80)</td>
                <td>15 (75)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Issues using public restrooms</td>
                <td>12 (60)</td>
                <td>8 (40)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Avoids travel</td>
                <td>6 (30)</td>
                <td>—</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Struggles with romantic relationships</td>
                <td>—</td>
                <td>9 (45)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Mobility impairment</td>
                <td>—</td>
                <td>6 (30)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Spousal support</td>
                <td>—</td>
                <td>3 (15)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Avoids hobbies</td>
                <td>—</td>
                <td>1 (5)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Avoids social activities</td>
                <td>—</td>
                <td>1 (5)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Negative impact on career</td>
                <td>—</td>
                <td>1 (5)</td>
              </tr>
              <tr valign="top">
                <td colspan="2">
                  <bold>Hygiene issues</bold>
                </td>
                <td>14 (70)</td>
                <td>13 (65)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Hard or effort to clean</td>
                <td>11 (55)</td>
                <td>11 (55)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Skin tearing</td>
                <td>7 (35)</td>
                <td>—</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Penile bleeding</td>
                <td>6 (30)</td>
                <td>2 (10)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Infections</td>
                <td>—</td>
                <td>6 (30)</td>
              </tr>
              <tr valign="top">
                <td colspan="2">
                  <bold>Contributing factors</bold>
                </td>
                <td>20 (100)</td>
                <td>—</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Worse after weight gain</td>
                <td>14 (70)</td>
                <td>—</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Worse after multiple surgeries</td>
                <td>8 (40)</td>
                <td>—</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Worse after weight loss</td>
                <td>4 (20)</td>
                <td>—</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Improvement after weight loss</td>
                <td>0 (0)</td>
                <td>—</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table2fn1">
              <p><sup>a</sup>Not applicable.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Validity and Reliability of GPT-4 Analysis</title>
        <p>To further assess the validity of GPT-4 analysis, we generated an agreement matrix comparing themes coded by human researchers and GPT-4 per interview (<xref rid="figure2" ref-type="fig">Figure 2</xref>). There were 63 instances where both human and GPT-4 analyses agreed on the presence of a theme, and 14 instances of agreement on a theme being absent. There was disagreement in 23 cases—16 where humans identified a theme that GPT-4 did not and 7 where GPT-4 identified a theme that humans did not (<xref ref-type="table" rid="table3">Table 3</xref>). The overall Cohen κ coefficient was 0.401, indicating moderate agreement. Boxes depict interview theme analysis. The blue (AI) and yellow (humans) squares indicate presence and green squares reflect agreement on presence or absence.</p>
        <p>We assessed reliability by analyzing the same interview transcript 10 times with the same prompt set (<xref ref-type="table" rid="table4">Table 4</xref>). There was consistent identification of “body image struggles or disfigurement” and “chronic pain and discomfort,” both appearing in all iterations (10/10, 100%). “Depression” was also frequently coded, appearing in 90% (9/10) of analyses. High reliability was observed for “motivated to have surgery,” “uses shower or tub to urinate,” and “weight challenges,” each occurring in 80% (8/10) of the analyses. Other codes such as “issues using public restrooms,” “unable to perform intercourse,” and “negative health care experiences” were present in 70% (7/10) of iterations. Codes for “hard or effort to clean,” “decreased self-esteem,” and “necrotizing fasciitis diagnosis” were identified 60% (6/10) of the time. Codes were less frequent for “urinary tract infections” (3/10, 30%), “sits to urinate” (2/10, 20%), and a cluster of codes that included “dependency on others for care,” “social isolation and loneliness,” “high frequency of urination,” “anxiety,” “loss of physical autonomy,” “financial burden,” and “hematuria,” each appearing once (1/10, 10%).</p>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>Themes identified per interview by GPT-4 versus human researchers.</p>
          </caption>
          <graphic xlink:href="jmir_v26i1e56500_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <table-wrap position="float" id="table3">
          <label>Table 3</label>
          <caption>
            <p>Codes and exemplar quotes identified exclusively by GPT-4.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="140"/>
            <col width="710"/>
            <col width="150"/>
            <thead>
              <tr valign="top">
                <td>Interview number</td>
                <td>GPT-4 code applied:  <break/>  
            exemplar quote</td>
                <td>Theme</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>3</td>
                <td>Impact on marital relationship: “I am married? And you know it’s it is... strained or? I wasn’t meeting her needs.”</td>
                <td>Impacts social life</td>
              </tr>
              <tr valign="top">
                <td>3</td>
                <td>Hygiene management efforts: “I try to keep myself pretty clean... I really tried to wash my genitals really well.”</td>
                <td>Hygiene issues</td>
              </tr>
              <tr valign="top">
                <td>6</td>
                <td>Mental health impact and resilience: “Yes in some ways it did affect me but other ways I don’t really don’t think it did.”</td>
                <td>Mental health issues</td>
              </tr>
              <tr valign="top">
                <td>8</td>
                <td>Mental health and self-image concerns: “the preconceived notion you know but the man’s function is supposed to be.”</td>
                <td>Mental health issues</td>
              </tr>
              <tr valign="top">
                <td>9</td>
                <td>Improved hygiene post surgery: “I actually feel that hygiene became a lot easier simply because I didn’t have to dig my finger in and run around the shaft to try and wash everything out.”</td>
                <td>Hygiene issues</td>
              </tr>
              <tr valign="top">
                <td>16</td>
                <td>Day-to-day discontent and social withdrawal: “It’s just I just I would hate for other candidates that going forward thinking there is nothing that can be done need to be here they need to have options on the table.”</td>
                <td>Impacts social life</td>
              </tr>
              <tr valign="top">
                <td>18</td>
                <td>Urinary dysfunction and social anxiety: “I would say they’re abnormal for somebody my age a lot of times it’s needing the needing to push… And that can cause anxiety in a public sort of restroom atmosphere.”</td>
                <td>Urinary issues</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
        <table-wrap position="float" id="table4">
          <label>Table 4</label>
          <caption>
            <p>Reliability of GPT-4–generated codes.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="700"/>
            <col width="300"/>
            <thead>
              <tr valign="bottom">
                <td>Code<sup>a</sup></td>
                <td>Participants, n (%)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Body image struggles or disfigurement</td>
                <td>10 (100)</td>
              </tr>
              <tr valign="top">
                <td>Chronic pain and discomfort</td>
                <td>10 (100)</td>
              </tr>
              <tr valign="top">
                <td>Depression</td>
                <td>9 (90)</td>
              </tr>
              <tr valign="top">
                <td>Motivated to have surgery</td>
                <td>8 (80)</td>
              </tr>
              <tr valign="top">
                <td>Uses shower or tub to urinate</td>
                <td>8 (80)</td>
              </tr>
              <tr valign="top">
                <td>Weight challenges</td>
                <td>8 (80)</td>
              </tr>
              <tr valign="top">
                <td>Issues using public restrooms</td>
                <td>7 (70)</td>
              </tr>
              <tr valign="top">
                <td>Unable to perform intercourse</td>
                <td>7 (70)</td>
              </tr>
              <tr valign="top">
                <td>Negative health care experiences</td>
                <td>7 (70)</td>
              </tr>
              <tr valign="top">
                <td>Hard or effort to clean</td>
                <td>6 (60)</td>
              </tr>
              <tr valign="top">
                <td>Decreased self-esteem</td>
                <td>6 (60)</td>
              </tr>
              <tr valign="top">
                <td>Necrotizing fasciitis diagnosis</td>
                <td>6 (60)</td>
              </tr>
              <tr valign="top">
                <td>Urinary tract infections</td>
                <td>3 (30)</td>
              </tr>
              <tr valign="top">
                <td>Sits to urinate</td>
                <td>2 (20)</td>
              </tr>
              <tr valign="top">
                <td>Dependency on others for care</td>
                <td>1 (10)</td>
              </tr>
              <tr valign="top">
                <td>Social isolation and loneliness</td>
                <td>1 (10)</td>
              </tr>
              <tr valign="top">
                <td>High frequency of urination</td>
                <td>1 (10)</td>
              </tr>
              <tr valign="top">
                <td>Anxiety</td>
                <td>1 (10)</td>
              </tr>
              <tr valign="top">
                <td>Loss of physical autonomy</td>
                <td>1 (10)</td>
              </tr>
              <tr valign="top">
                <td>Financial burden</td>
                <td>1 (10)</td>
              </tr>
              <tr valign="top">
                <td>Hematuria</td>
                <td>1 (10)</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table4fn1">
              <p><sup>a</sup>Presence of codes from the same interview analyzed 10 times by GPT-4. Each code was counted only once per analysis, indicating whether it was identified (present) or not (absent) during each separate analysis.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Results</title>
        <p>In this investigation, we directly compared the performance of AI (GPT-4) with human researchers in conducting a qualitative analysis of interviews with patients affected by AABP. Our study is the first of its kind, to our knowledge, to perform such a direct comparison, highlighting the potential use of AI in qualitative research. By using generalized prompts, our method allows researchers without specialized NLP knowledge to use GPT-4 for rigorous qualitative analysis, significantly reducing the time investment required.</p>
        <p>Our results showed moderate alignment between GPT-4 and human analyses in identifying key themes, including urinary challenges, sexual health issues, and mental health impacts. Human analysis identified more subthemes, capturing the data’s complexities more thoroughly than GPT-4. This difference may stem from GPT-4’s token size limitations, which restrict its ability to perform comprehensive analyses as the input length increases [<xref ref-type="bibr" rid="ref29">29</xref>]. The reliability tests revealed that while GPT-4 consistently recognized key codes, its identification of subtler codes was more variable. This suggests that implementing repeated analysis cycles, similar to the human multirater approach, could refine AI’s analytical reliability. Overall, our findings underscore a complementary role for AI and human collaboration in qualitative research, where each can augment the strengths of the other.</p>
        <p>The question of how to evaluate the accuracy and reliability of AI-driven analysis is crucial for future research. We adopted a quantitative approach to directly compare the presence of themes and subthemes in both human and AI analyses. By calculating Cohen κ, a statistic that measures interrater reliability by considering the agreement occurring by chance, we provided an objective assessment of the consistency of themes identified by GPT-4 compared with human analysis, presupposing human analysis as the “gold standard.” In addition, to ensure consistency in GPT-4’s outputs, we conducted multiple iterations of the same interview transcript analysis, analogous to traditional qualitative research methods where multiple analysts and iterative coding processes are used to standardize analyses and minimize biases. It is important to note that while these quantitative metrics offer a clear criterion for comparison, they may not fully capture the depth and richness of qualitative insights. GPT-4 has demonstrated the ability to detect subtle nuances and emotional contexts from text data, suggesting that incorporating more qualitative approaches in AI analysis evaluation could enhance the understanding of its analytical capabilities [<xref ref-type="bibr" rid="ref30">30</xref>,<xref ref-type="bibr" rid="ref31">31</xref>].</p>
      </sec>
      <sec>
        <title>Limitations</title>
        <p>A primary limitation of this study arises from the comparison phase, where themes and subthemes generated by GPT-4 were aligned with those identified by human researchers. Although a blinded reviewer was used to mitigate potential bias, the subjective nature of qualitative analysis means that a degree of bias is likely to remain. This is a common challenge in qualitative research, where analysts’ subjective interpretations inherently influence their analysis. However, it can be argued that the use of a large language model such as GPT-4 may present a more objective method of analysis compared with the potential variability inherent between different human researchers’ analyses, due to the large language model’s consistent application of its transformer model.</p>
        <p>We deliberately chose qualitative description as our analytic approach, favoring the accuracy to source material over depth of analysis. Qualitative description involves the systematic categorization and interpretation of qualitative data to uncover patterns and insights while staying close to the original data [<xref ref-type="bibr" rid="ref22">22</xref>-<xref ref-type="bibr" rid="ref24">24</xref>]. A more context-based approach, such as thematic analysis, could generate richer themes and subthemes but poses challenges for comparability. More interpretative methods may introduce subjectivity, reducing reproducibility. While our methodological choice ensures that our study remains accessible as a framework for others to build on and develop more interpretative techniques, the need for comparison limited our depth of insights.</p>
        <p>Qualitative methods have inherent limitations, such as potential bias and limited generalizability due to smaller, nonrandom samples, and aim to produce in-depth insights and understanding rather than population inferences [<xref ref-type="bibr" rid="ref32">32</xref>,<xref ref-type="bibr" rid="ref33">33</xref>]. Consequently, our findings may not capture the full diversity of patient experiences, potentially limiting the generalizability of our results. Nevertheless, our study primarily aims to provide a comparative analysis, focusing on GPT-4 as a suitable tool for qualitative research applications.</p>
        <p>As GPT-4 and other large language models advance, their analytical capabilities are expected to become more sophisticated, which may alter their proficiency in qualitative analysis. For example, while GPT-3.5 scored in the bottom 10% on a simulated bar examination, GPT-4 has demonstrated a significant improvement, placing within the top 10% of test takers [<xref ref-type="bibr" rid="ref11">11</xref>]. The study’s findings are therefore a snapshot of GPT-4’s capabilities at a specific point in time and may not fully represent its future potential in qualitative analysis. Despite this limitation, the current trajectory of AI indicates that the use of GPT-4 and similar large language models in qualitative research is likely to become increasingly robust and refined.</p>
      </sec>
      <sec>
        <title>Comparison With Previous Work</title>
        <p>While studies applying GPT-4 or other large language models to qualitative research are limited, a growing body of work has compared the performance of OpenAI’s GPT models, including GPT-3, -3.5, and -4, with that of humans in academic research and medical education [<xref ref-type="bibr" rid="ref12">12</xref>-<xref ref-type="bibr" rid="ref15">15</xref>]. Wang et al [<xref ref-type="bibr" rid="ref34">34</xref>] found that while ChatGPT can generate accurate and relevant information, it is not without gaps when compared with official sources, indicating a need for supplementary validation from reliable references. Other studies have shown that ChatGPT can mimic the style of human-written research abstracts, albeit with limitations in quality and accuracy, as indicated by the ability of blinded reviewers to distinguish AI-generated content [<xref ref-type="bibr" rid="ref35">35</xref>]. In the field of medical education, ChatGPT has been shown to outperform medical students on examinations, suggesting valuable applications in examination preparation [<xref ref-type="bibr" rid="ref36">36</xref>]. Similarly, ChatGPT’s performance on the United States Medical Licensing Examination (USMLE) further showcases the potential use of AI in medical education, where it achieved scores near the passing threshold without specialized training [<xref ref-type="bibr" rid="ref37">37</xref>]. These findings emphasize that while advanced large language models such as GPT-4 are becoming increasingly competent in complex tasks, their current role remains complementary to human expertise.</p>
        <p>The application of GPT-4 and other large language models to health care is a burgeoning field with substantial promise, resting on the fundamental ability of AI to process qualitative data efficiently. In patient care, large language models can enhance communication by translating complex medical language into more accessible terms for health care providers and patients [<xref ref-type="bibr" rid="ref38">38</xref>]. The performance of large language models on medical licensing examinations also indicates their potential use in supporting clinical decision-making [<xref ref-type="bibr" rid="ref39">39</xref>]. In administrative contexts, large language models are particularly valuable for generating concise clinical summaries and synthesizing extensive electronic medical record documentation; tasks that typically consume considerable time for health care professionals. The integration of large language models into administrative workflows may increase efficiency and allow clinicians to allocate more time to direct patient care. Health care companies are already beginning to integrate large language models into electronic health records, such as Epic’s recent partnership with Microsoft to embed Azure OpenAI service into its own electronic health record systems [<xref ref-type="bibr" rid="ref40">40</xref>].</p>
        <p>Despite its promise, the integration of large language models in health care raises several ethical concerns that warrant careful consideration [<xref ref-type="bibr" rid="ref41">41</xref>]. Foremost among these is data privacy, particularly regarding the handling of sensitive patient information, necessitating robust safeguards against data breaches. The opacity of these models, due to the unavailability of public training data sets and model weights, poses another concern as it obscures the understanding of their decision-making processes and challenges their trustworthiness in clinical applications [<xref ref-type="bibr" rid="ref42">42</xref>]. In addition, the commercialization of large language models by major corporations, such as OpenAI, Microsoft, Meta, and Google, brings into question the potential influence of commercial interests on model development and deployment, possibly overshadowing patient welfare. A crucial concern is the risk of patient harm arising from incorrect or biased models, emphasizing the need for rigorous testing and validation of large language models to ensure their reliability and prevent adverse clinical outcomes [<xref ref-type="bibr" rid="ref43">43</xref>].</p>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>Our research demonstrates that large language models like GPT-4 can discern key themes from qualitative health care data when used with standardized prompts. This “out of the box” approach aligns moderately well with qualitative description analysis by human analysts. Future work should use more context-based prompts for deeper and richer themes. As this may introduce greater subjectivity, researchers should also explore iterative analyses, such as synthesizing output from multiple iterations, to improve large language model output reliability. In addition, researchers should assess the qualitative analytic abilities of other popular models like Gemini (Google), Llama (Meta), and Claude (Anthropic AI), and develop methods to circumvent the token limitations inherent in models such as GPT-4 by segmenting qualitative data inputs, enriching the depth and breadth of qualitative analyses.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group/>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">AABP</term>
          <def>
            <p>adult-acquired buried penis</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">AI</term>
          <def>
            <p>artificial intelligence</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">BERT</term>
          <def>
            <p>Bidirectional Encoder Representations from Transformers</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">IRB</term>
          <def>
            <p>Institutional Review Board</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">NLP</term>
          <def>
            <p>natural language processing</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">TURNS</term>
          <def>
            <p>Trauma and Urologic Reconstructive Network of Surgeons</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">UCSF</term>
          <def>
            <p>University of California San Francisco</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">USMLE</term>
          <def>
            <p>United States Medical Licensing Examination</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>ChatGPT was not used in the ideation or writing of this manuscript.</p>
    </ack>
    <fn-group>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Han</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Ma</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Tian</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>He</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>He</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Zhu</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Qiang</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Shen</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Ge</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>Summary of ChatGPT-related research and perspective towards the future of large language models</article-title>
          <source>Meta-Radiology</source>
          <year>2023</year>
          <volume>1</volume>
          <issue>2</issue>
          <fpage>100017</fpage>
          <pub-id pub-id-type="doi">10.1016/j.metrad.2023.100017</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Clusmann</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Kolbinger</surname>
              <given-names>FR</given-names>
            </name>
            <name name-style="western">
              <surname>Muti</surname>
              <given-names>HS</given-names>
            </name>
            <name name-style="western">
              <surname>Carrero</surname>
              <given-names>ZI</given-names>
            </name>
            <name name-style="western">
              <surname>Eckardt</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Laleh</surname>
              <given-names>NG</given-names>
            </name>
            <name name-style="western">
              <surname>Löffler</surname>
              <given-names>CML</given-names>
            </name>
            <name name-style="western">
              <surname>Schwarzkopf</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Unger</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Veldhuizen</surname>
              <given-names>GP</given-names>
            </name>
            <name name-style="western">
              <surname>Wagner</surname>
              <given-names>SJ</given-names>
            </name>
            <name name-style="western">
              <surname>Kather</surname>
              <given-names>JN</given-names>
            </name>
          </person-group>
          <article-title>The future landscape of large language models in medicine</article-title>
          <source>Commun Med (Lond)</source>
          <year>2023</year>
          <volume>3</volume>
          <issue>1</issue>
          <fpage>141</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/s43856-023-00370-1"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s43856-023-00370-1</pub-id>
          <pub-id pub-id-type="medline">37816837</pub-id>
          <pub-id pub-id-type="pii">10.1038/s43856-023-00370-1</pub-id>
          <pub-id pub-id-type="pmcid">PMC10564921</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Meyer</surname>
              <given-names>JG</given-names>
            </name>
            <name name-style="western">
              <surname>Urbanowicz</surname>
              <given-names>RJ</given-names>
            </name>
            <name name-style="western">
              <surname>Martin</surname>
              <given-names>PCN</given-names>
            </name>
            <name name-style="western">
              <surname>O'Connor</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Peng</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Bright</surname>
              <given-names>TJ</given-names>
            </name>
            <name name-style="western">
              <surname>Tatonetti</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Won</surname>
              <given-names>KJ</given-names>
            </name>
            <name name-style="western">
              <surname>Gonzalez-Hernandez</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Moore</surname>
              <given-names>JH</given-names>
            </name>
          </person-group>
          <article-title>ChatGPT and large language models in academia: opportunities and challenges</article-title>
          <source>BioData Min</source>
          <year>2023</year>
          <volume>16</volume>
          <issue>1</issue>
          <fpage>20</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://biodatamining.biomedcentral.com/articles/10.1186/s13040-023-00339-9"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s13040-023-00339-9</pub-id>
          <pub-id pub-id-type="medline">37443040</pub-id>
          <pub-id pub-id-type="pii">10.1186/s13040-023-00339-9</pub-id>
          <pub-id pub-id-type="pmcid">PMC10339472</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ray</surname>
              <given-names>PP</given-names>
            </name>
          </person-group>
          <article-title>ChatGPT: a comprehensive review on background, applications, key challenges, bias, ethics, limitations and future scope</article-title>
          <source>Internet of Things and Cyber-Physical Systems</source>
          <year>2023</year>
          <volume>3</volume>
          <fpage>121</fpage>
          <lpage>154</lpage>
          <pub-id pub-id-type="doi">10.1016/j.iotcps.2023.04.003</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Schopow</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Osterhoff</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Baur</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Applications of the natural language processing tool ChatGPT in clinical practice: comparative study and augmented systematic review</article-title>
          <source>JMIR Med Inform</source>
          <year>2023</year>
          <volume>11</volume>
          <fpage>e48933</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://medinform.jmir.org/2023//e48933/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/48933</pub-id>
          <pub-id pub-id-type="medline">38015610</pub-id>
          <pub-id pub-id-type="pii">v11i1e48933</pub-id>
          <pub-id pub-id-type="pmcid">PMC10716749</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Almeida</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Faria</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Queirós</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Strengths and limitations of qualitative and quantitative research methods</article-title>
          <source>Eur J Educ Stud</source>
          <year>2017</year>
          <volume>3</volume>
          <issue>9</issue>
          <fpage>369</fpage>
          <lpage>387</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.researchgate.net/publication/319852576_Strengths_and_Limitations_of_Qualitative_and_Quantitative_Research_Methods"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kantor</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Best practices for implementing ChatGPT, large language models, and artificial intelligence in qualitative and survey-based research</article-title>
          <source>JAAD Int</source>
          <year>2024</year>
          <volume>14</volume>
          <fpage>22</fpage>
          <lpage>23</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S2666-3287(23)00157-8"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jdin.2023.10.001</pub-id>
          <pub-id pub-id-type="medline">38054196</pub-id>
          <pub-id pub-id-type="pii">S2666-3287(23)00157-8</pub-id>
          <pub-id pub-id-type="pmcid">PMC10694559</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hitch</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Artificial intelligence augmented qualitative analysis: the way of the future?</article-title>
          <source>Qual Health Res</source>
          <year>2024</year>
          <volume>34</volume>
          <issue>7</issue>
          <fpage>595</fpage>
          <lpage>606</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://journals.sagepub.com/doi/abs/10.1177/10497323231217392?url_ver=Z39.88-2003&#38;rfr_id=ori:rid:crossref.org&#38;rfr_dat=cr_pub  0pubmed"/>
          </comment>
          <pub-id pub-id-type="doi">10.1177/10497323231217392</pub-id>
          <pub-id pub-id-type="medline">38064244</pub-id>
          <pub-id pub-id-type="pmcid">PMC11103925</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lennon</surname>
              <given-names>RP</given-names>
            </name>
            <name name-style="western">
              <surname>Fraleigh</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Van Scoy</surname>
              <given-names>LJ</given-names>
            </name>
            <name name-style="western">
              <surname>Keshaviah</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Hu</surname>
              <given-names>XC</given-names>
            </name>
            <name name-style="western">
              <surname>Snyder</surname>
              <given-names>BL</given-names>
            </name>
            <name name-style="western">
              <surname>Miller</surname>
              <given-names>EL</given-names>
            </name>
            <name name-style="western">
              <surname>Calo</surname>
              <given-names>WA</given-names>
            </name>
            <name name-style="western">
              <surname>Zgierska</surname>
              <given-names>AE</given-names>
            </name>
            <name name-style="western">
              <surname>Griffin</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Developing and testing an automated qualitative assistant (AQUA) to support qualitative analysis</article-title>
          <source>Fam Med Community Health</source>
          <year>2021</year>
          <volume>9</volume>
          <issue>Suppl 1</issue>
          <fpage>e001287</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://fmch.bmj.com/lookup/pmidlookup?view=long&#38;pmid=34824135"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/fmch-2021-001287</pub-id>
          <pub-id pub-id-type="medline">34824135</pub-id>
          <pub-id pub-id-type="pii">fmch-2021-001287</pub-id>
          <pub-id pub-id-type="pmcid">PMC8627418</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cheligeer</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Nandi</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Doktorchik</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Quan</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Zeng</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Singh</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Natural language processing (NLP) aided qualitative method in health research</article-title>
          <source>J Integr Des Process Sci</source>
          <year>2023</year>
          <volume>27</volume>
          <issue>1</issue>
          <fpage>41</fpage>
          <lpage>58</lpage>
          <pub-id pub-id-type="doi">10.3233/jid-220013</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <collab>OpenAI</collab>
            <name name-style="western">
              <surname>Achiam</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Adler</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Agarwal</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Ahmad</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Akkaya</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Aleman</surname>
              <given-names>FL</given-names>
            </name>
            <name name-style="western">
              <surname>Almeida</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Altenschmidt</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Altman</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Anadkat</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Avila</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Babuschkin</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Balaji</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Balcom</surname>
              <given-names>V</given-names>
            </name>
          </person-group>
          <article-title>GPT-4 Technical Report</article-title>
          <source>ArXiv</source>
          <comment>Preprint posted online on March 04, 2024</comment>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://arxiv.org/abs/2303.08774"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Xie</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Lyu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Cai</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Carroll</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Redefining qualitative analysis in the AI era: utilizing ChatGPT for efficient thematic analysis</article-title>
          <source>ArXiv</source>
          <comment>Preprint posted online on May 28, 2024</comment>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/2309.10771"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hamilton</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Elliott</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Quick</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Smith</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Choplin</surname>
              <given-names>V</given-names>
            </name>
          </person-group>
          <article-title>Exploring the use of AI in qualitative analysis: a comparative study of guaranteed income data</article-title>
          <source>Int J Qual Methods</source>
          <year>2023</year>
          <volume>22</volume>
          <fpage>16094069231201504</fpage>
          <pub-id pub-id-type="doi">10.1177/16094069231201504</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Morgan</surname>
              <given-names>DL</given-names>
            </name>
          </person-group>
          <article-title>Exploring the use of artificial intelligence for qualitative data analysis: the case of ChatGPT</article-title>
          <source>International Journal of Qualitative Methods</source>
          <year>2023</year>
          <volume>22</volume>
          <pub-id pub-id-type="doi">10.1177/16094069231211248</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wachinger</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Bärnighausen</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Schäfer</surname>
              <given-names>LN</given-names>
            </name>
            <name name-style="western">
              <surname>Scott</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>McMahon</surname>
              <given-names>SA</given-names>
            </name>
          </person-group>
          <article-title>Prompts, pearls, imperfections: comparing ChatGPT and a human researcher in qualitative data analysis</article-title>
          <source>Qual Health Res</source>
          <year>2024</year>
          <fpage>10497323241244669</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://journals.sagepub.com/doi/abs/10.1177/10497323241244669?url_ver=Z39.88-2003&#38;rfr_id=ori:rid:crossref.org&#38;rfr_dat=cr_pub  0pubmed"/>
          </comment>
          <pub-id pub-id-type="doi">10.1177/10497323241244669</pub-id>
          <pub-id pub-id-type="medline">38775392</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>A Fuller</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Morbitzer</surname>
              <given-names>KA</given-names>
            </name>
            <name name-style="western">
              <surname>Zeeman</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>M Persky</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>C Savage</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>McLaughlin</surname>
              <given-names>JE</given-names>
            </name>
          </person-group>
          <article-title>Exploring the use of ChatGPT to analyze student course evaluation comments</article-title>
          <source>BMC Med Educ</source>
          <year>2024</year>
          <volume>24</volume>
          <issue>1</issue>
          <fpage>423</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcmededuc.biomedcentral.com/articles/10.1186/s12909-024-05316-2"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12909-024-05316-2</pub-id>
          <pub-id pub-id-type="medline">38641798</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12909-024-05316-2</pub-id>
          <pub-id pub-id-type="pmcid">PMC11031883</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Amirova</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Fteropoulli</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Ahmed</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Cowie</surname>
              <given-names>MR</given-names>
            </name>
            <name name-style="western">
              <surname>Leibo</surname>
              <given-names>JZ</given-names>
            </name>
          </person-group>
          <article-title>Framework-based qualitative analysis of free responses of large language models: algorithmic fidelity</article-title>
          <source>PLoS One</source>
          <year>2024</year>
          <volume>19</volume>
          <issue>3</issue>
          <fpage>e0300024</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.plos.org/10.1371/journal.pone.0300024"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0300024</pub-id>
          <pub-id pub-id-type="medline">38470890</pub-id>
          <pub-id pub-id-type="pii">PONE-D-23-31945</pub-id>
          <pub-id pub-id-type="pmcid">PMC10931535</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Amend</surname>
              <given-names>GM</given-names>
            </name>
            <name name-style="western">
              <surname>Holler</surname>
              <given-names>JT</given-names>
            </name>
            <name name-style="western">
              <surname>Sadighian</surname>
              <given-names>MJ</given-names>
            </name>
            <name name-style="western">
              <surname>Rios</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Hakam</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Nabavizadeh</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Enriquez</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Shaw</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Vanni</surname>
              <given-names>AJ</given-names>
            </name>
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Erickson</surname>
              <given-names>BA</given-names>
            </name>
            <name name-style="western">
              <surname>Buckley</surname>
              <given-names>JC</given-names>
            </name>
            <name name-style="western">
              <surname>Breyer</surname>
              <given-names>BN</given-names>
            </name>
          </person-group>
          <article-title>The lived experience of patients with adult acquired buried penis</article-title>
          <source>J Urol</source>
          <year>2022</year>
          <volume>208</volume>
          <issue>2</issue>
          <fpage>396</fpage>
          <lpage>405</lpage>
          <pub-id pub-id-type="doi">10.1097/JU.0000000000002667</pub-id>
          <pub-id pub-id-type="medline">35767655</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Meskó</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>Prompt engineering as an important emerging skill for medical professionals: tutorial</article-title>
          <source>J Med Internet Res</source>
          <year>2023</year>
          <volume>25</volume>
          <fpage>e50638</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2023//e50638/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/50638</pub-id>
          <pub-id pub-id-type="medline">37792434</pub-id>
          <pub-id pub-id-type="pii">v25i1e50638</pub-id>
          <pub-id pub-id-type="pmcid">PMC10585440</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="web">
          <source>Otter.ai - AI Meeting Note Taker &#38; Real-time AI Transcription</source>
          <access-date>2024-01-17</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://otter.ai/">https://otter.ai/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="web">
          <article-title>One platform to connect</article-title>
          <source>Zoom</source>
          <access-date>2024-01-17</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://zoom.us/">https://zoom.us/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sandelowski</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Whatever happened to qualitative description?</article-title>
          <source>Res Nurs Health</source>
          <year>2000</year>
          <volume>23</volume>
          <issue>4</issue>
          <fpage>334</fpage>
          <lpage>340</lpage>
          <pub-id pub-id-type="doi">10.1002/1098-240x(200008)23:4&#60;334::aid-nur9&#62;3.0.co;2-g</pub-id>
          <pub-id pub-id-type="medline">10940958</pub-id>
          <pub-id pub-id-type="pii">10.1002/1098-240X(200008)23:4&#60;334::AID-NUR9&#62;3.0.CO;2-G</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sandelowski</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>What's in a name? Qualitative description revisited</article-title>
          <source>Res Nurs Health</source>
          <year>2010</year>
          <volume>33</volume>
          <issue>1</issue>
          <fpage>77</fpage>
          <lpage>84</lpage>
          <pub-id pub-id-type="doi">10.1002/nur.20362</pub-id>
          <pub-id pub-id-type="medline">20014004</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Neergaard</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Olesen</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Andersen</surname>
              <given-names>RS</given-names>
            </name>
            <name name-style="western">
              <surname>Sondergaard</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Qualitative description - the poor cousin of health research?</article-title>
          <source>BMC Med Res Methodol</source>
          <year>2009</year>
          <volume>9</volume>
          <fpage>52</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcmedresmethodol.biomedcentral.com/articles/10.1186/1471-2288-9-52"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/1471-2288-9-52</pub-id>
          <pub-id pub-id-type="medline">19607668</pub-id>
          <pub-id pub-id-type="pii">1471-2288-9-52</pub-id>
          <pub-id pub-id-type="pmcid">PMC2717117</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zapf</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Castell</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Morawietz</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Karch</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Measuring inter-rater reliability for nominal data - which coefficients and confidence intervals are appropriate?</article-title>
          <source>BMC Med Res Methodol</source>
          <year>2016</year>
          <volume>16</volume>
          <fpage>93</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcmedresmethodol.biomedcentral.com/articles/10.1186/s12874-016-0200-9"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12874-016-0200-9</pub-id>
          <pub-id pub-id-type="medline">27495131</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12874-016-0200-9</pub-id>
          <pub-id pub-id-type="pmcid">PMC4974794</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="web">
          <source>Research with Dr Kriukow Youtube page</source>
          <year>2023</year>
          <month>05</month>
          <day>19</day>
          <access-date>2024-07-25</access-date>
          <publisher-name>Thematic analysis with ChatGPT &#124; PART 1- Coding qualitative Data with ChatGPT</publisher-name>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.youtube.com/watch?v=8dTs7D42ge0">https://www.youtube.com/watch?v=8dTs7D42ge0</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ahmed</surname>
              <given-names>SK</given-names>
            </name>
          </person-group>
          <article-title>The pillars of trustworthiness in qualitative research</article-title>
          <source>J Med Surg Public Health</source>
          <year>2024</year>
          <volume>2</volume>
          <fpage>100051</fpage>
          <pub-id pub-id-type="doi">10.1016/j.glmedi.2024.100051</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <collab>Bengfort J</collab>
          </person-group>
          <source>Now Available: Versa, UCSF Generative AI Platform</source>
          <year>2024</year>
          <access-date>2024-07-25</access-date>
          <publisher-name>Office of the Chancellor</publisher-name>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://chancellor.ucsf.edu/news/now-available-versa-ucsf-generative-ai-platform">https://chancellor.ucsf.edu/news/now-available-versa-ucsf-generative-ai-platform</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kohn</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <source>Mastering token limits and memory in ChatGPT and other large language models</source>
          <year>2023</year>
          <access-date>2024-07-25</access-date>
          <publisher-name>Medium</publisher-name>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://medium.com/@russkohn/mastering-ai-token-limits-and-memory-ce920630349a">https://medium.com/@russkohn/mastering-ai-token-limits-and-memory-ce920630349a</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Baktash</surname>
              <given-names>JA</given-names>
            </name>
            <name name-style="western">
              <surname>Dawodi</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>GPT-4: a review on advancements and opportunities in natural language processing</article-title>
          <source>ArXiv</source>
          <comment>Preprint posted online on May 04, 2023</comment>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/2305.03195"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Elyoseph</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Hadar-Shoval</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Asraf</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Lvovsky</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>ChatGPT outperforms humans in emotional awareness evaluations</article-title>
          <source>Front Psychol</source>
          <year>2023</year>
          <volume>14</volume>
          <fpage>1199058</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/37303897"/>
          </comment>
          <pub-id pub-id-type="doi">10.3389/fpsyg.2023.1199058</pub-id>
          <pub-id pub-id-type="medline">37303897</pub-id>
          <pub-id pub-id-type="pmcid">PMC10254409</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Borgstede</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Scholz</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Quantitative and qualitative approaches to generalization and replication-A representationalist view</article-title>
          <source>Front Psychol</source>
          <year>2021</year>
          <volume>12</volume>
          <fpage>605191</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/33613387"/>
          </comment>
          <pub-id pub-id-type="doi">10.3389/fpsyg.2021.605191</pub-id>
          <pub-id pub-id-type="medline">33613387</pub-id>
          <pub-id pub-id-type="pmcid">PMC7892774</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Tenny</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Brannan</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Brannan</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Qualitative Study</article-title>
          <source>StatPearls [Internet]</source>
          <access-date>2024-07-06</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.ncbi.nlm.nih.gov/books/NBK470395/">https://www.ncbi.nlm.nih.gov/books/NBK470395/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Gao</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Zhou</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Guo</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Potential and limitations of ChatGPT 3.5 and 4.0 as a source of COVID-19 information: comprehensive comparative analysis of generative and authoritative information</article-title>
          <source>J Med Internet Res</source>
          <year>2023</year>
          <volume>25</volume>
          <fpage>e49771</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2023//e49771/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/49771</pub-id>
          <pub-id pub-id-type="medline">38096014</pub-id>
          <pub-id pub-id-type="pii">v25i1e49771</pub-id>
          <pub-id pub-id-type="pmcid">PMC10755661</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cheng</surname>
              <given-names>SL</given-names>
            </name>
            <name name-style="western">
              <surname>Tsai</surname>
              <given-names>SJ</given-names>
            </name>
            <name name-style="western">
              <surname>Bai</surname>
              <given-names>YM</given-names>
            </name>
            <name name-style="western">
              <surname>Ko</surname>
              <given-names>CH</given-names>
            </name>
            <name name-style="western">
              <surname>Hsu</surname>
              <given-names>CW</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>FC</given-names>
            </name>
            <name name-style="western">
              <surname>Tsai</surname>
              <given-names>CK</given-names>
            </name>
            <name name-style="western">
              <surname>Tu</surname>
              <given-names>YK</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>SN</given-names>
            </name>
            <name name-style="western">
              <surname>Tseng</surname>
              <given-names>PT</given-names>
            </name>
            <name name-style="western">
              <surname>Hsu</surname>
              <given-names>TW</given-names>
            </name>
            <name name-style="western">
              <surname>Liang</surname>
              <given-names>CH</given-names>
            </name>
            <name name-style="western">
              <surname>Su</surname>
              <given-names>KP</given-names>
            </name>
          </person-group>
          <article-title>Comparisons of quality, correctness, and similarity between ChatGPT-generated and human-written abstracts for basic research: cross-sectional study</article-title>
          <source>J Med Internet Res</source>
          <year>2023</year>
          <volume>25</volume>
          <fpage>e51229</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2023//e51229/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/51229</pub-id>
          <pub-id pub-id-type="medline">38145486</pub-id>
          <pub-id pub-id-type="pii">v25i1e51229</pub-id>
          <pub-id pub-id-type="pmcid">PMC10760418</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Roos</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Kasapovic</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Jansen</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Kaczmarczyk</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Artificial intelligence in medical education: comparative analysis of ChatGPT, bing, and medical students in Germany</article-title>
          <source>JMIR Med Educ</source>
          <year>2023</year>
          <volume>9</volume>
          <fpage>e46482</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://mededu.jmir.org/2023//e46482/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/46482</pub-id>
          <pub-id pub-id-type="medline">37665620</pub-id>
          <pub-id pub-id-type="pii">v9i1e46482</pub-id>
          <pub-id pub-id-type="pmcid">PMC10507517</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kung</surname>
              <given-names>TH</given-names>
            </name>
            <name name-style="western">
              <surname>Cheatham</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Medenilla</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Sillos</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>De Leon</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Elepaño</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Madriaga</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Aggabao</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Diaz-Candido</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Maningo</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Tseng</surname>
              <given-names>V</given-names>
            </name>
          </person-group>
          <article-title>Performance of ChatGPT on USMLE: potential for AI-assisted medical education using large language models</article-title>
          <source>PLOS Digit Health</source>
          <year>2023</year>
          <volume>2</volume>
          <issue>2</issue>
          <fpage>e0000198</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/36812645"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pdig.0000198</pub-id>
          <pub-id pub-id-type="medline">36812645</pub-id>
          <pub-id pub-id-type="pii">PDIG-D-22-00371</pub-id>
          <pub-id pub-id-type="pmcid">PMC9931230</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref38">
        <label>38</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Decker</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Trang</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Ramirez</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Colley</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Pierce</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Coleman</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Bongiovanni</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Melton</surname>
              <given-names>GB</given-names>
            </name>
            <name name-style="western">
              <surname>Wick</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Large language model-based chatbot vs surgeon-generated informed consent documentation for common procedures</article-title>
          <source>JAMA Netw Open</source>
          <year>2023</year>
          <volume>6</volume>
          <issue>10</issue>
          <fpage>e2336997</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/37812419"/>
          </comment>
          <pub-id pub-id-type="doi">10.1001/jamanetworkopen.2023.36997</pub-id>
          <pub-id pub-id-type="medline">37812419</pub-id>
          <pub-id pub-id-type="pii">2810364</pub-id>
          <pub-id pub-id-type="pmcid">PMC10562939</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref39">
        <label>39</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Benary</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>XD</given-names>
            </name>
            <name name-style="western">
              <surname>Schmidt</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Soll</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Hilfenhaus</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Nassir</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Sigler</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Knödler</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Keller</surname>
              <given-names>U</given-names>
            </name>
            <name name-style="western">
              <surname>Beule</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Keilholz</surname>
              <given-names>U</given-names>
            </name>
            <name name-style="western">
              <surname>Leser</surname>
              <given-names>U</given-names>
            </name>
            <name name-style="western">
              <surname>Rieke</surname>
              <given-names>DT</given-names>
            </name>
          </person-group>
          <article-title>Leveraging large language models for decision support in personalized oncology</article-title>
          <source>JAMA Netw Open</source>
          <year>2023</year>
          <volume>6</volume>
          <issue>11</issue>
          <fpage>e2343689</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/37976064"/>
          </comment>
          <pub-id pub-id-type="doi">10.1001/jamanetworkopen.2023.43689</pub-id>
          <pub-id pub-id-type="medline">37976064</pub-id>
          <pub-id pub-id-type="pii">2812097</pub-id>
          <pub-id pub-id-type="pmcid">PMC10656647</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref40">
        <label>40</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Redmond</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Verona</surname>
              <given-names>W</given-names>
            </name>
          </person-group>
          <source>Microsoft and epic expand strategic collaboration with integration of azure openAI service</source>
          <year>2023</year>
          <month>04</month>
          <day>17</day>
          <access-date>2024-07-25</access-date>
          <publisher-name>Microsoft News Center</publisher-name>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://news.microsoft.com/2023/04/17/microsoft-and-epic-expand-strategic-collaboration-with-integration-of-azure-openai-service/">https://news.microsoft.com/2023/04/17/microsoft-and-epic-expand-strategic-collaboration-with-integration-of-azure-openai-service/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref41">
        <label>41</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Meskó</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Topol</surname>
              <given-names>EJ</given-names>
            </name>
          </person-group>
          <article-title>The imperative for regulatory oversight of large language models (or generative AI) in healthcare</article-title>
          <source>NPJ Digit Med</source>
          <year>2023</year>
          <volume>6</volume>
          <issue>1</issue>
          <fpage>120</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/s41746-023-00873-0"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41746-023-00873-0</pub-id>
          <pub-id pub-id-type="medline">37414860</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41746-023-00873-0</pub-id>
          <pub-id pub-id-type="pmcid">PMC10326069</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref42">
        <label>42</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sanderson</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>GPT-4 is here: what scientists think</article-title>
          <source>Nature</source>
          <year>2023</year>
          <volume>615</volume>
          <issue>7954</issue>
          <fpage>773</fpage>
          <pub-id pub-id-type="doi">10.1038/d41586-023-00816-5</pub-id>
          <pub-id pub-id-type="medline">36928404</pub-id>
          <pub-id pub-id-type="pii">10.1038/d41586-023-00816-5</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref43">
        <label>43</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zack</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Lehman</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Suzgun</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Rodriguez</surname>
              <given-names>JA</given-names>
            </name>
            <name name-style="western">
              <surname>Celi</surname>
              <given-names>LA</given-names>
            </name>
            <name name-style="western">
              <surname>Gichoya</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Jurafsky</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Szolovits</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Bates</surname>
              <given-names>DW</given-names>
            </name>
            <name name-style="western">
              <surname>Abdulnour</surname>
              <given-names>RE</given-names>
            </name>
            <name name-style="western">
              <surname>Butte</surname>
              <given-names>AJ</given-names>
            </name>
            <name name-style="western">
              <surname>Alsentzer</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Assessing the potential of GPT-4 to perpetuate racial and gender biases in health care: a model evaluation study</article-title>
          <source>Lancet Digit Health</source>
          <year>2024</year>
          <volume>6</volume>
          <issue>1</issue>
          <fpage>e12</fpage>
          <lpage>e22</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S2589-7500(23)00225-X"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/S2589-7500(23)00225-X</pub-id>
          <pub-id pub-id-type="medline">38123252</pub-id>
          <pub-id pub-id-type="pii">S2589-7500(23)00225-X</pub-id>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
