<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="letter" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMIR</journal-id>
      <journal-id journal-id-type="nlm-ta">J Med Internet Res</journal-id>
      <journal-title>Journal of Medical Internet Research</journal-title>
      <issn pub-type="epub">1438-8871</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v26i1e52499</article-id>
      <article-id pub-id-type="pmid">38696245</article-id>
      <article-id pub-id-type="doi">10.2196/52499</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Research Letter</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Research Letter</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Using Large Language Models to Support Content Analysis: A Case Study of ChatGPT for Adverse Event Detection</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Jin</surname>
            <given-names>Qiao</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Li</surname>
            <given-names>Yiming</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Wang</surname>
            <given-names>Tongnian</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Zhu</surname>
            <given-names>Lingxuan</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Khosla</surname>
            <given-names>Archit</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Leas</surname>
            <given-names>Eric C</given-names>
          </name>
          <degrees>MPH, PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>Herbert Wertheim School of Public Health and Human Longevity Science</institution>
            <institution>University of California San Diego</institution>
            <addr-line>9500 Gilman Drive</addr-line>
            <addr-line>Mail Code: 0725</addr-line>
            <addr-line>La Jolla, CA, 92093</addr-line>
            <country>United States</country>
            <phone>1 951 346 9131</phone>
            <email>ecleas@ucsd.edu</email>
          </address>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-9221-0336</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author">
          <name name-style="western">
            <surname>Ayers</surname>
            <given-names>John W</given-names>
          </name>
          <degrees>MA, PhD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <xref rid="aff3" ref-type="aff">3</xref>
          <xref rid="aff4" ref-type="aff">4</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-8831-8691</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Desai</surname>
            <given-names>Nimit</given-names>
          </name>
          <degrees>BS</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-4628-964X</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Dredze</surname>
            <given-names>Mark</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff5" ref-type="aff">5</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-0422-2474</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author">
          <name name-style="western">
            <surname>Hogarth</surname>
            <given-names>Michael</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff4" ref-type="aff">4</xref>
          <xref rid="aff6" ref-type="aff">6</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-4264-1258</ext-link>
        </contrib>
        <contrib id="contrib6" contrib-type="author">
          <name name-style="western">
            <surname>Smith</surname>
            <given-names>Davey M</given-names>
          </name>
          <degrees>MAS, MD</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <xref rid="aff4" ref-type="aff">4</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-3603-1733</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Herbert Wertheim School of Public Health and Human Longevity Science</institution>
        <institution>University of California San Diego</institution>
        <addr-line>La Jolla, CA</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Qualcomm Institute</institution>
        <institution>University of California San Diego</institution>
        <addr-line>La Jolla, CA</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>Division of Infectious Diseases and Global Public Health</institution>
        <institution>Department of Medicine</institution>
        <institution>University of California San Diego</institution>
        <addr-line>La Jolla, CA</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff4">
        <label>4</label>
        <institution>Altman Clinical Translational Research Institute</institution>
        <institution>University of California San Diego</institution>
        <addr-line>La Jolla, CA</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff5">
        <label>5</label>
        <institution>Department of Computer Science</institution>
        <institution>Johns Hopkins University</institution>
        <addr-line>Baltimore, MD</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff6">
        <label>6</label>
        <institution>Department of Biomedical Informatics</institution>
        <institution>University of California San Diego</institution>
        <addr-line>La Jolla, CA</addr-line>
        <country>United States</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Eric C Leas <email>ecleas@ucsd.edu</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <year>2024</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>2</day>
        <month>5</month>
        <year>2024</year>
      </pub-date>
      <volume>26</volume>
      <elocation-id>e52499</elocation-id>
      <history>
        <date date-type="received">
          <day>6</day>
          <month>9</month>
          <year>2023</year>
        </date>
        <date date-type="rev-request">
          <day>10</day>
          <month>3</month>
          <year>2024</year>
        </date>
        <date date-type="rev-recd">
          <day>14</day>
          <month>3</month>
          <year>2024</year>
        </date>
        <date date-type="accepted">
          <day>28</day>
          <month>3</month>
          <year>2024</year>
        </date>
      </history>
      <copyright-statement>©Eric C Leas, John W Ayers, Nimit Desai, Mark Dredze, Michael Hogarth, Davey M Smith. Originally published in the Journal of Medical Internet Research (https://www.jmir.org), 02.05.2024.</copyright-statement>
      <copyright-year>2024</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in the Journal of Medical Internet Research, is properly cited. The complete bibliographic information, a link to the original publication on https://www.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://www.jmir.org/2024/1/e52499" xlink:type="simple"/>
      <abstract>
        <p>This study explores the potential of using large language models to assist content analysis by conducting a case study to identify adverse events (AEs) in social media posts. The case study compares ChatGPT’s performance with human annotators’ in detecting AEs associated with delta-8-tetrahydrocannabinol, a cannabis-derived product. Using the identical instructions given to human annotators, ChatGPT closely approximated human results, with a high degree of agreement noted: 94.4% (9436/10,000) for any AE detection (Fleiss κ=0.95) and 99.3% (9931/10,000) for serious AEs (κ=0.96). These findings suggest that ChatGPT has the potential to replicate human annotation accurately and efficiently. The study recognizes possible limitations, including concerns about the generalizability due to ChatGPT’s training data, and prompts further research with different models, data sources, and content analysis tasks. The study highlights the promise of large language models for enhancing the efficiency of biomedical research.</p>
      </abstract>
      <kwd-group>
        <kwd>adverse events</kwd>
        <kwd>artificial intelligence</kwd>
        <kwd>AI</kwd>
        <kwd>text analysis</kwd>
        <kwd>annotation</kwd>
        <kwd>ChatGPT</kwd>
        <kwd>LLM</kwd>
        <kwd>large language model</kwd>
        <kwd>cannabis</kwd>
        <kwd>delta-8-THC</kwd>
        <kwd>delta-8-tetrahydrocannabiol</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <p>Biomedical text analysis is commonly burdened by the need for manual data review and annotation, which is costly and time-consuming. Artificial intelligence (AI) tools, including large language models (LLMs) such as ChatGPT (OpenAI) [<xref ref-type="bibr" rid="ref1">1</xref>], could reduce this burden by allowing scientists to leverage vast amounts of text data (including medical records and public data) with short written prompts as annotation instructions [<xref ref-type="bibr" rid="ref2">2</xref>]. To explore the potential for AI-assisted annotation, we evaluated whether ChatGPT could replicate human identification of adverse events (AEs) about a cannabis-derived product (delta-8-tetrahydrocannabinol) reported in social media posts [<xref ref-type="bibr" rid="ref3">3</xref>]. AE detection requires reviewing a large amount of unstructured text data to flag a tiny fraction of AE reports, making it an ideal application for AI-assisted annotation [<xref ref-type="bibr" rid="ref4">4</xref>].</p>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Overview</title>
        <p>To reduce selective reporting bias, we replicated a peer-reviewed publication, wherein human annotators identified AEs in 10,000 randomly sampled, publicly available posts from a delta-8-tetrahydrocannabiol social media forum (Reddit’s r/delta8) [<xref ref-type="bibr" rid="ref3">3</xref>]. Human annotators identified potential AE reports (yes or no) and whether the AE was serious according to 6 Food and Drug Administration MedWatch categories (eg, hospitalization) [<xref ref-type="bibr" rid="ref5">5</xref>].</p>
        <p>ChatGPT (gpt-3.5-turbo-0613) was set to the default settings (<italic>Temperature</italic>=1, <italic>Top P</italic>=1, <italic>Max token limit</italic>=1700, <italic>Frequency Penalty</italic>=0, and <italic>Presence Penalty</italic>=0); given each Reddit post; and asked to reference annotation instructions identical to those given to human annotators, except for a minor modification for result formatting (ie, requested codes in a comma-delimited format; <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). Since ChatGPT was treated as an additional annotator, we compared ChatGPT’s responses with human annotations using the traditional method for assessing interrater reliability rather than statistics for assessing classifiers (eg, <italic>F</italic><sub>1</sub>-score). Thus, we calculated absolute agreement and prevalence- and bias-adjusted Fleiss κ statistics for any AEs, serious AEs, and each MedWatch category of serious AEs [<xref ref-type="bibr" rid="ref6">6</xref>]. Analyses were computed with R statistical software (version 4.3.1; R Core Team).</p>
      </sec>
      <sec>
        <title>Ethical Considerations</title>
        <p>This study was exempted by the University of California San Diego’s human research protection program because the data were public and nonidentifiable (45 CFR §46).</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <p>ChatGPT returned misformatted responses (eg, including the text “adverse event” instead of the requested “0” or “1”) in 35 (0.35%) of 10,000 instances. All misformatted responses were interpretable and resolved through normal data-cleaning methods (eg, rule matching). Example posts along with their labels are shown in <xref ref-type="table" rid="table1">Table 1</xref>. ChatGPT and human annotators agreed on 94.4% (9436/10,000) of labels for any AEs (κ=0.95) and 99.3% (9931/10,000) of labels for any serious AEs (κ=0.96; <xref ref-type="table" rid="table2">Table 2</xref>). For serious AEs, the lowest agreement was 99.4% (9939/10,000) for “other” serious (but undefined) outcomes (κ=0.98). All specifically defined outcomes (eg, hospitalization) achieved 99.9% (≥9986/10,000) agreement (κ=0.99).</p>
      <table-wrap position="float" id="table1">
        <label>Table 1</label>
        <caption>
          <p>Example of posts to the Reddit community r/delta8 and the corresponding categorizations.</p>
        </caption>
        <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
          <col width="770"/>
          <col width="230"/>
          <thead>
            <tr valign="bottom">
              <td>Title and text</td>
              <td>Labels<sup>a</sup></td>
            </tr>
          </thead>
          <tbody>
            <tr valign="top">
              <td>Had to be rushed to the ER after eating an edible. Last week me and my boyfriend bought delta 8 edibles from a vape shop. We were bored and decided it would be a good idea to test it out, we ate two (approximately .1 gram in total). Just a side note, this is was not my first time eating an edible so I didn't really think much of it. It took about 40 minutes for the edible to kick in, at first I just felt very heavy and It was super hard to move, so I laid down for about an hour. Eventually I got bored of laying down and got up to go shower...bad decision. According to my boyfriend, when I got up I fainted. I remember waking up to him freaking tf out, it was very hard to breathe, and it felt like my heart was going to burst. They rushed me to the ER because I was barely able to stay conscious. I had a phycotic break, I thought I was dead, kept hearing all kinds of noises, and I completely lost touch with reality. My heart rate was over 165, I also have a heart condition so they had to keep an eye on that too. It was the most terrifying and traumatizing experience, and I'm still not over it yet. Has anyone gone through this before?</td>
              <td>Identified as an adverse event report and considered serious with the following outcomes: life-threatening, hospitalization, and other serious adverse event</td>
            </tr>
            <tr valign="top">
              <td>Help I feel hungover from delta 8. I feel so awful and can't stop puking. I took 10 mg last night and still feel horrible today. Any advice?</td>
              <td>Identified as an adverse event report, but not considered serious</td>
            </tr>
            <tr valign="top">
              <td>Battery Question. Can someone please recommend and ideal wattage/voltage to use the [BRAND] with? I only have variable wattage/voltage batteries for nicotine vaping and am unfamiliar with batteries used for oils. I’m assuming the former type should work fine as long as I have them set low enough? Any help is appreciated. Thanks</td>
              <td>Not identified as an adverse event report</td>
            </tr>
          </tbody>
        </table>
        <table-wrap-foot>
          <fn id="table1fn1">
            <p><sup>a</sup>Serious adverse events were defined using the Food and Drug Administration MedWatch health outcome categories, which include life-threatening; hospitalization; disability or permanent damage; congenital anomaly or birth defect; required intervention to prevent permanent impairment; or other serious event.</p>
          </fn>
        </table-wrap-foot>
      </table-wrap>
      <table-wrap position="float" id="table2">
        <label>Table 2</label>
        <caption>
          <p>Accuracy of ChatGPT in replicating human identification of adverse events in r/delta8 posts (N=10,000) and the categorization of adverse events to the Food and Drug Administration MedWatch outcome categories.</p>
        </caption>
        <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
          <col width="30"/>
          <col width="170"/>
          <col width="200"/>
          <col width="200"/>
          <col width="0"/>
          <col width="0"/>
          <col width="200"/>
          <col width="0"/>
          <col width="0"/>
          <col width="200"/>
          <thead>
            <tr valign="bottom">
              <td colspan="2">MedWatch categories and ChatGPT response</td>
              <td colspan="3">Human annotation</td>
              <td colspan="3">Agreement, n (%)</td>
              <td colspan="2">κ statistic<sup>a</sup></td>
            </tr>
          </thead>
          <tbody>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>
                <break/>
              </td>
              <td>Yes, n</td>
              <td>No, n</td>
              <td colspan="3">
                <break/>
              </td>
              <td colspan="3">
                <break/>
              </td>
            </tr>
            <tr valign="top">
              <td colspan="6">
                <bold>Labeled as an adverse event report</bold>
              </td>
              <td colspan="3">9436 (94.4)</td>
              <td>0.95</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Yes</td>
              <td>172</td>
              <td>401</td>
              <td colspan="3">
                <break/>
              </td>
              <td colspan="3">
                <break/>
              </td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>No</td>
              <td>163</td>
              <td>9264</td>
              <td colspan="3">
                <break/>
              </td>
              <td colspan="3">
                <break/>
              </td>
            </tr>
            <tr valign="top">
              <td colspan="6">
                <bold>Labeled as a serious adverse event report<sup>b</sup></bold>
              </td>
              <td colspan="3">9331 (99.3)</td>
              <td>0.96</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Yes</td>
              <td>15</td>
              <td>17</td>
              <td colspan="3">
                <break/>
              </td>
              <td colspan="3">
                <break/>
              </td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>No</td>
              <td>52</td>
              <td>9916</td>
              <td colspan="3">
                <break/>
              </td>
              <td colspan="3">
                <break/>
              </td>
            </tr>
            <tr valign="top">
              <td colspan="6">
                <bold>Life-threatening</bold>
              </td>
              <td colspan="3">9995 (99.9)</td>
              <td>0.99</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Yes</td>
              <td>1</td>
              <td>5</td>
              <td colspan="3">
                <break/>
              </td>
              <td colspan="3">
                <break/>
              </td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>No</td>
              <td>0</td>
              <td>9994</td>
              <td colspan="3">
                <break/>
              </td>
              <td colspan="3">
                <break/>
              </td>
            </tr>
            <tr valign="top">
              <td colspan="6">
                <bold>Hospitalization</bold>
              </td>
              <td colspan="3">
                <break/>
              </td>
              <td>
                <break/>
              </td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Yes</td>
              <td>5</td>
              <td>6</td>
              <td colspan="3">9993 (99.9)</td>
              <td colspan="3">0.99</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>No</td>
              <td>1</td>
              <td>9988</td>
              <td colspan="3">
                <break/>
              </td>
              <td colspan="3">
                <break/>
              </td>
            </tr>
            <tr valign="top">
              <td colspan="6">
                <bold>Disability or permanent damage</bold>
              </td>
              <td colspan="3">9998 (99.9)</td>
              <td>N/A<sup>c</sup></td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Yes</td>
              <td>0</td>
              <td>2</td>
              <td colspan="3">
                <break/>
              </td>
              <td colspan="3">
                <break/>
              </td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>No</td>
              <td>0</td>
              <td>9998</td>
              <td colspan="3">
                <break/>
              </td>
              <td colspan="3">
                <break/>
              </td>
            </tr>
            <tr valign="top">
              <td colspan="6">
                <bold>Congenital anomaly or birth defect</bold>
              </td>
              <td colspan="3">9999 (99.9)</td>
              <td>N/A</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Yes</td>
              <td>0</td>
              <td>1</td>
              <td colspan="3">
                <break/>
              </td>
              <td colspan="3">
                <break/>
              </td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>No</td>
              <td>0</td>
              <td>9999</td>
              <td colspan="3">
                <break/>
              </td>
              <td colspan="3">
                <break/>
              </td>
            </tr>
            <tr valign="top">
              <td colspan="6">
                <bold>Required intervention to prevent permanent impairment or damage</bold>
              </td>
              <td colspan="3">9986 (99.9)</td>
              <td>0.99</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Yes</td>
              <td>0</td>
              <td>2</td>
              <td colspan="3">
                <break/>
              </td>
              <td colspan="3">
                <break/>
              </td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>No</td>
              <td>12</td>
              <td>9986</td>
              <td colspan="3">
                <break/>
              </td>
              <td colspan="3">
                <break/>
              </td>
            </tr>
            <tr valign="top">
              <td colspan="6">
                <bold>Other serious or important medical events</bold>
              </td>
              <td colspan="3">9939 (99.4)</td>
              <td>0.98</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Yes</td>
              <td>7</td>
              <td>13</td>
              <td colspan="3">
                <break/>
              </td>
              <td colspan="3">
                <break/>
              </td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>No</td>
              <td>48</td>
              <td>9932</td>
              <td colspan="3">
                <break/>
              </td>
              <td colspan="3">
                <break/>
              </td>
            </tr>
          </tbody>
        </table>
        <table-wrap-foot>
          <fn id="table2fn1">
            <p><sup>a</sup>Prevalence- and bias-adjusted Fleiss κ.</p>
          </fn>
          <fn id="table2fn2">
            <p><sup>b</sup>A composite of any of the 6 adverse event outcomes.</p>
          </fn>
          <fn id="table2fn3">
            <p><sup>c</sup>N/A: not applicable (κ could not be calculated due to no events being found by human annotators).</p>
          </fn>
        </table-wrap-foot>
      </table-wrap>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <p>ChatGPT demonstrated near-perfect replication of human-identified AEs in social media posts using the exact instructions that guided human annotators. Despite significant resource allocation, automating AE detection has seen limited success. Many studies (eg, social media studies) often omit performance metrics such as agreement with ground truth altogether [<xref ref-type="bibr" rid="ref7">7</xref>]. The LLM and prompt used outperformed the best-performing specialized software for detecting AEs from text data (agreement=94.5%; κ=0.89), which relied on structured and human-curated electronic discharge summaries [<xref ref-type="bibr" rid="ref8">8</xref>].</p>
      <p>We note a few limitations. First, we did not have any measures from the replicated study to estimate time or cost savings attributable to using an LLM. However, these savings would be considerable. If a human annotated 1 post/min, the replicated study’s estimated completion time would be 166.6 hours (10,000 posts × 60 posts/h), or 20.8 workdays. Conversely, assuming ChatGPT annotated a post in 2 seconds [<xref ref-type="bibr" rid="ref9">9</xref>], it would take 5.6 hours with no human effort. Second, the social media data analyzed may be included in ChatGPT’s underlying training data, potentially inflating the accuracy reported herein and reducing generalizability. Third, our goal was to replicate human annotation using the exact codebook that trained human annotators and default settings of ChatGPT-3.5-turbo. Although this alone showed promise, further improvements to the prompt, different models (GPT-4 or Llama-2), or alternative model parameter specifications may improve the accuracy. Finally, we only assessed 1 application of an LLM for biomedical text analysis; inaccuracy and label bias may exist in other settings. Further research is needed to capture process outcomes (eg, time savings), apply LLMs to traditional biomedical data (eg, health records), and address more complex methods of annotation (eg, open coding).</p>
      <p>While acknowledging its limitations, this case study demonstrates the potential for AI to assist researchers in text analysis. Given the demand for annotations in biomedical research and the inherent time and cost constraints, adopting LLM-powered tools could expedite the research process and consequently scientific discovery.</p>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>Prompt used to train ChatGPT.</p>
        <media xlink:href="jmir_v26i1e52499_app1.docx" xlink:title="DOCX File , 15 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">AE</term>
          <def>
            <p>adverse event</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">AI</term>
          <def>
            <p>artificial intelligence</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">LLM</term>
          <def>
            <p>large language model</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>This work was funded by grant K01DA054303 from the National Institute on Drug Abuse, the Burroughs Wellcome Fund, and the National Institutes of Health (UL1TR001442). The study sponsors took no part in the study design; collection, analysis, and interpretation of data; the writing of the manuscript; or the decision to submit the manuscript for publication.</p>
    </ack>
    <notes>
      <sec>
        <title>Data Availability</title>
        <p>The corresponding data for the study are available on the first author’s website [<xref ref-type="bibr" rid="ref10">10</xref>].</p>
      </sec>
    </notes>
    <fn-group>
      <fn fn-type="conflict">
        <p>ECL has received consulting fees from Good Analytics. JWA owns equity in Health Watcher and Good Analytics. ND has received consulting fees from Pearl Health. MD owns equity in Good Analytics and receives consulting fees from Bloomberg LP. MH advised LifeLink, a company that developed a health care chatbot, between 2016 and 2020, and maintains an equity position in the company. DMS reports paid consulting for Bayer, Arena Pharmaceuticals, Evidera, FluxErgy, Model Medicines, and Linear Therapies.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="web">
          <article-title>ChatGPT</article-title>
          <source>OpenAI</source>
          <access-date>2024-04-25</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://chat.openai.com/">https://chat.openai.com/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Goldberg</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Kohane</surname>
              <given-names>I</given-names>
            </name>
          </person-group>
          <source>The AI Revolution in Medicine: GPT-4 and Beyond</source>
          <year>2023</year>
          <publisher-loc>London, UK</publisher-loc>
          <publisher-name>Pearson</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Leas</surname>
              <given-names>EC</given-names>
            </name>
            <name name-style="western">
              <surname>Harati</surname>
              <given-names>RM</given-names>
            </name>
            <name name-style="western">
              <surname>Satybaldiyeva</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Morales</surname>
              <given-names>NE</given-names>
            </name>
            <name name-style="western">
              <surname>Huffaker</surname>
              <given-names>SL</given-names>
            </name>
            <name name-style="western">
              <surname>Mejorado</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Grant</surname>
              <given-names>I</given-names>
            </name>
          </person-group>
          <article-title>Self-reported adverse events associated with ∆8-tetrahydrocannabinol (delta-8-THC) use</article-title>
          <source>J Cannabis Res</source>
          <year>2023</year>
          <month>05</month>
          <day>23</day>
          <volume>5</volume>
          <issue>1</issue>
          <fpage>15</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://jcannabisresearch.biomedcentral.com/articles/10.1186/s42238-023-00191-y"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s42238-023-00191-y</pub-id>
          <pub-id pub-id-type="medline">37217977</pub-id>
          <pub-id pub-id-type="pii">10.1186/s42238-023-00191-y</pub-id>
          <pub-id pub-id-type="pmcid">PMC10204335</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sarker</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Ginn</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Nikfarjam</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>O'Connor</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Smith</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Jayaraman</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Upadhaya</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Gonzalez</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Utilizing social media data for pharmacovigilance: a review</article-title>
          <source>J Biomed Inform</source>
          <year>2015</year>
          <month>04</month>
          <volume>54</volume>
          <fpage>202</fpage>
          <lpage>212</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(15)00036-2"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2015.02.004</pub-id>
          <pub-id pub-id-type="medline">25720841</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(15)00036-2</pub-id>
          <pub-id pub-id-type="pmcid">PMC4408239</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="web">
          <article-title>MedWatch: The FDA Safety InformationAdverse Event Reporting Program</article-title>
          <source>US Food and Drug Administration</source>
          <year>2022</year>
          <month>9</month>
          <day>15</day>
          <access-date>2023-01-03</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.fda.gov/safety/medwatch-fda-safety-information-and-adverse-event-reporting-program">https://www.fda.gov/safety/medwatch-fda-safety-information-and-adverse-event-reporting-program</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Byrt</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Bishop</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Carlin</surname>
              <given-names>JB</given-names>
            </name>
          </person-group>
          <article-title>Bias, prevalence and kappa</article-title>
          <source>J Clin Epidemiol</source>
          <year>1993</year>
          <month>5</month>
          <volume>46</volume>
          <issue>5</issue>
          <fpage>423</fpage>
          <lpage>429</lpage>
          <pub-id pub-id-type="doi">10.1016/0895-4356(93)90018-V</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Pierce</surname>
              <given-names>CE</given-names>
            </name>
            <name name-style="western">
              <surname>Bouri</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Pamer</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Proestel</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Rodriguez</surname>
              <given-names>HW</given-names>
            </name>
            <name name-style="western">
              <surname>van Le</surname>
              <given-names>Hoa</given-names>
            </name>
            <name name-style="western">
              <surname>Freifeld</surname>
              <given-names>CC</given-names>
            </name>
            <name name-style="western">
              <surname>Brownstein</surname>
              <given-names>JS</given-names>
            </name>
            <name name-style="western">
              <surname>Walderhaug</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Edwards</surname>
              <given-names>IR</given-names>
            </name>
            <name name-style="western">
              <surname>Dasgupta</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <article-title>Evaluation of Facebook and Twitter monitoring to detect safety signals for medical products: an analysis of recent FDA safety alerts</article-title>
          <source>Drug Saf</source>
          <year>2017</year>
          <month>04</month>
          <volume>40</volume>
          <issue>4</issue>
          <fpage>317</fpage>
          <lpage>331</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/28044249"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/s40264-016-0491-0</pub-id>
          <pub-id pub-id-type="medline">28044249</pub-id>
          <pub-id pub-id-type="pii">10.1007/s40264-016-0491-0</pub-id>
          <pub-id pub-id-type="pmcid">PMC5362648</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Melton</surname>
              <given-names>GB</given-names>
            </name>
            <name name-style="western">
              <surname>Hripcsak</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Automated detection of adverse events using natural language processing of discharge summaries</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2005</year>
          <volume>12</volume>
          <issue>4</issue>
          <fpage>448</fpage>
          <lpage>457</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/15802475"/>
          </comment>
          <pub-id pub-id-type="doi">10.1197/jamia.M1794</pub-id>
          <pub-id pub-id-type="medline">15802475</pub-id>
          <pub-id pub-id-type="pii">M1794</pub-id>
          <pub-id pub-id-type="pmcid">PMC1174890</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="web">
          <article-title>OpenAI API and other LLM APIs response time tracker</article-title>
          <source>GPT for Work by Talarian</source>
          <access-date>2024-03-13</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://gptforwork.com/tools/openai-api-and-other-llm-apis-response-time-tracker">https://gptforwork.com/tools/openai-api-and-other-llm-apis-response-time-tracker</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Leas</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Publication data</article-title>
          <source>Eric Leas</source>
          <access-date>2024-04-29</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.ericleas.com/datasets">https://www.ericleas.com/datasets</ext-link>
          </comment>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
