<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article article-type="letter" dtd-version="2.0" xmlns:xlink="http://www.w3.org/1999/xlink">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMIR</journal-id>
      <journal-id journal-id-type="nlm-ta">J Med Internet Res</journal-id>
      <journal-title>Journal of Medical Internet Research</journal-title>
      <issn pub-type="epub">1438-8871</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v26i1e57778</article-id>
      <article-id pub-id-type="pmid">38625723</article-id>
      <article-id pub-id-type="doi">10.2196/57778</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Letter to the Editor</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Letter to the Editor</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Authors’ Reply: “Evaluating GPT-4’s Cognitive Functions Through the Bloom Taxonomy: Insights and Clarifications”</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Leung</surname>
            <given-names>Tiffany</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author">
          <name name-style="western">
            <surname>Herrmann-Werner</surname>
            <given-names>Anne</given-names>
          </name>
          <degrees>MME, Prof Dr Med</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-2413-7047</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Festl-Wietek</surname>
            <given-names>Teresa</given-names>
          </name>
          <degrees>MSc, Dr Rer Nat</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>Tübingen Institute for Medical Education</institution>
            <institution>Faculty of Medicine</institution>
            <institution>University of Tübingen</institution>
            <addr-line>Elfriede-Aulhorn-Strasse 10</addr-line>
            <addr-line>Tübingen, 72076</addr-line>
            <country>Germany</country>
            <phone>49 7071 29 73715</phone>
            <email>teresa.festl-wietek@med.uni-tuebingen.de</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-1450-1757</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Holderried</surname>
            <given-names>Friederike</given-names>
          </name>
          <degrees>MME, Dr Med</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-1828-0920</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Herschbach</surname>
            <given-names>Lea</given-names>
          </name>
          <degrees>MSc</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0005-6378-5073</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author">
          <name name-style="western">
            <surname>Griewatz</surname>
            <given-names>Jan</given-names>
          </name>
          <degrees>MA</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-9731-3171</ext-link>
        </contrib>
        <contrib id="contrib6" contrib-type="author">
          <name name-style="western">
            <surname>Masters</surname>
            <given-names>Ken</given-names>
          </name>
          <degrees>Prof Dr</degrees>
          <xref rid="aff4" ref-type="aff">4</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-3425-5020</ext-link>
        </contrib>
        <contrib id="contrib7" contrib-type="author">
          <name name-style="western">
            <surname>Zipfel</surname>
            <given-names>Stephan</given-names>
          </name>
          <degrees>Prof Dr Med</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-1659-4440</ext-link>
        </contrib>
        <contrib id="contrib8" contrib-type="author">
          <name name-style="western">
            <surname>Mahling</surname>
            <given-names>Moritz</given-names>
          </name>
          <degrees>MHBA, Dr Med</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff5" ref-type="aff">5</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-7960-4015</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Tübingen Institute for Medical Education</institution>
        <institution>Faculty of Medicine</institution>
        <institution>University of Tübingen</institution>
        <addr-line>Tübingen</addr-line>
        <country>Germany</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Department of Psychosomatic Medicine and Psychotherapy</institution>
        <institution>University Hospital Tübingen</institution>
        <addr-line>Tübingen</addr-line>
        <country>Germany</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>University Department of Anesthesiology and Intensive Care Medicine</institution>
        <institution>University Hospital Tübingen</institution>
        <addr-line>Tübingen</addr-line>
        <country>Germany</country>
      </aff>
      <aff id="aff4">
        <label>4</label>
        <institution>Medical Education and Informatics Department</institution>
        <institution>College of Medicine and Health Sciences</institution>
        <institution>Sultan Qaboos University</institution>
        <addr-line>Muscat</addr-line>
        <country>Oman</country>
      </aff>
      <aff id="aff5">
        <label>5</label>
        <institution>Department of Diabetology, Endocrinology, Nephrology</institution>
        <institution>Section of Nephrology and Hypertension</institution>
        <institution>University Hospital Tübingen</institution>
        <addr-line>Tübingen</addr-line>
        <country>Germany</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Teresa Festl-Wietek <email>teresa.festl-wietek@med.uni-tuebingen.de</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <year>2024</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>16</day>
        <month>4</month>
        <year>2024</year>
      </pub-date>
      <volume>26</volume>
      <elocation-id>e57778</elocation-id>
      <history>
        <date date-type="received">
          <day>26</day>
          <month>2</month>
          <year>2024</year>
        </date>
        <date date-type="rev-request">
          <day>1</day>
          <month>3</month>
          <year>2024</year>
        </date>
        <date date-type="rev-recd">
          <day>4</day>
          <month>3</month>
          <year>2024</year>
        </date>
        <date date-type="accepted">
          <day>4</day>
          <month>4</month>
          <year>2024</year>
        </date>
      </history>
      <copyright-statement>©Anne Herrmann-Werner, Teresa Festl-Wietek, Friederike Holderried, Lea Herschbach, Jan Griewatz, Ken Masters, Stephan Zipfel, Moritz Mahling. Originally published in the Journal of Medical Internet Research (https://www.jmir.org), 16.04.2024.</copyright-statement>
      <copyright-year>2024</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in the Journal of Medical Internet Research, is properly cited. The complete bibliographic information, a link to the original publication on https://www.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://www.jmir.org/2024/1/e57778" xlink:type="simple"/>
      <related-article related-article-type="commentary-article" id="v26i1e56997" ext-link-type="doi" xlink:href="10.2196/56997" vol="26" page="e56997" xlink:type="simple">https://www.jmir.org/2024/1/e56997/</related-article>
      <related-article related-article-type="commentary-article" id="v26i1e52113" ext-link-type="doi" xlink:href="10.2196/52113" vol="26" page="e52113" xlink:type="simple">https://www.jmir.org/2024/1/e52113</related-article>
      <kwd-group>
        <kwd>answer</kwd>
        <kwd>artificial intelligence</kwd>
        <kwd>assessment</kwd>
        <kwd>Bloom’s taxonomy</kwd>
        <kwd>ChatGPT</kwd>
        <kwd>classification</kwd>
        <kwd>error</kwd>
        <kwd>exam</kwd>
        <kwd>examination</kwd>
        <kwd>generative</kwd>
        <kwd>GPT-4</kwd>
        <kwd>Generative Pre-trained Transformer 4</kwd>
        <kwd>language model</kwd>
        <kwd>learning outcome</kwd>
        <kwd>LLM</kwd>
        <kwd>MCQ</kwd>
        <kwd>medical education</kwd>
        <kwd>medical exam</kwd>
        <kwd>multiple-choice question</kwd>
        <kwd>natural language processing</kwd>
        <kwd>NLP</kwd>
        <kwd>psychosomatic</kwd>
        <kwd>question</kwd>
        <kwd>response</kwd>
        <kwd>taxonomy</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <p>We appreciate the thoughtful commentary titled “Evaluating GPT-4’s Cognitive Functions Through the Bloom Taxonomy: Insights and Clarifications” [<xref ref-type="bibr" rid="ref1">1</xref>] and welcome the opportunity to clarify and expand upon our research findings [<xref ref-type="bibr" rid="ref2">2</xref>] regarding GPT-4’s cognitive evaluation using the Bloom taxonomy.</p>
    <p>First, we acknowledge the confusion surrounding the use of the term “difficulty” in our manuscript. Traditionally in educational testing, “difficulty” is quantified by the ratio of correct responses against the number of students taking the test [<xref ref-type="bibr" rid="ref3">3</xref>]; thus, a rating of 1 indicates an extremely simple question (100% correct responses), and a rating of 0 indicates a significantly challenging question (0% correct responses). Throughout the manuscript, we used “difficulty” as a measurement scale.</p>
    <p>Consequently, “higher difficulty” means it is higher on the scale and thus easier. This also applies to Figure 3. Because “lower” means less easy (ie, closer to 0 on the scale from 0 to 1), it shows that the questions answered correctly were easier compared to those answered wrong. Although our use of the measurement “difficulty” is correct, on reflection, we agree that we could have been clearer, and we apologize for any confusion.</p>
    <p>Second, the commentary on GPT-4’s approach to “memory” tasks adds a valuable dimension to our discussion. We agree that GPT-4 “remembers” through technical and programmatic means, highlighting the critical difference between GPT-4’s architecture and human cognitive processes, a distinction that was central to our study.</p>
    <p>However, GPT-4’s material selection is far more complex than a flat-file database with simple mapping (unless the exam questions had been in the testing data, but this is not applicable in our case). Generative tools like GPT-4 have other weaknesses and strengths. For example, they may perform relatively poorly on pure memory-recall problems but excel in topics requiring subtlety and nuanced work. This is demonstrated by GPT-4’s high performance on soft-skill questions from the USMLE (United States Medical Licensing Examination) and AMBOSS [<xref ref-type="bibr" rid="ref4">4</xref>]. Part of our study went further by using the Bloom taxonomy as a framework for tracing the logical process of GPT-4’s <italic>explanations</italic> (not <italic>answers</italic>) and determining the stages at which its errors occurred.</p>
    <p>This discussion underscores a critical point: the complexity of assessing artificial intelligence and the processes underlying the output of models like GPT-4. This methodology allows us to critically examine where GPT-4’s responses fall within a spectrum of cognitive tasks, from simple recall to more complex analytical and evaluative processes.</p>
    <p>Third, while it is quite true that many questions in medical qualifying exams are simple memory-type questions, we see this as a weakness rather than an optimum aiming point. While our understanding is that medical schools are trying to move away from those types of questions, this is an area of further research.</p>
    <p>Again, we thank the author for the thoughtful critique of our paper and the resultant continued discussion, which underscores the importance of ongoing dialogue and research into artificial intelligence’s cognitive processes and how they parallel and diverge from human cognition.</p>
  </body>
  <back>
    <app-group/>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">USMLE</term>
          <def>
            <p>United States Medical Licensing Examination</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <fn-group>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>KJ</given-names>
            </name>
          </person-group>
          <article-title>Evaluating GPT-4's cognitive functions through the Bloom taxonomy: insights and clarifications</article-title>
          <source>J Med Internet Res</source>
          <year>2024</year>
          <month>04</month>
          <day>16</day>
          <volume>26</volume>
          <fpage>e56997</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2024/1/e56997/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/56997</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Herrmann-Werner</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Festl-Wietek</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Holderried</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Herschbach</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Griewatz</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Masters</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Zipfel</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Mahling</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Assessing ChatGPT's mastery of Bloom's taxonomy using psychosomatic medicine exam questions: mixed-methods study</article-title>
          <source>J Med Internet Res</source>
          <year>2024</year>
          <month>01</month>
          <day>23</day>
          <volume>26</volume>
          <fpage>e52113</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2024//e52113/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/52113</pub-id>
          <pub-id pub-id-type="medline">38261378</pub-id>
          <pub-id pub-id-type="pii">v26i1e52113</pub-id>
          <pub-id pub-id-type="pmcid">PMC10848129</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Möltner</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Schellberg</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Jünger</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Grundlegende quantitative Analysen medizinischer Prüfungen</article-title>
          <source>GMS Zeitschrift Medizinische Ausbildung</source>
          <year>2006</year>
          <volume>23</volume>
          <issue>3</issue>
          <fpage>Doc53</fpage>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Brin</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Sorin</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Vaid</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Soroush</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Glicksberg</surname>
              <given-names>BS</given-names>
            </name>
            <name name-style="western">
              <surname>Charney</surname>
              <given-names>AW</given-names>
            </name>
            <name name-style="western">
              <surname>Nadkarni</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Klang</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Comparing ChatGPT and GPT-4 performance in USMLE soft skill assessments</article-title>
          <source>Sci Rep</source>
          <year>2023</year>
          <month>10</month>
          <day>01</day>
          <volume>13</volume>
          <issue>1</issue>
          <fpage>16492</fpage>
          <pub-id pub-id-type="doi">10.1038/s41598-023-43436-9</pub-id>
          <pub-id pub-id-type="medline">37779171</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41598-023-43436-9</pub-id>
          <pub-id pub-id-type="pmcid">PMC10543445</pub-id>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
