<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMIR</journal-id>
      <journal-id journal-id-type="nlm-ta">J Med Internet Res</journal-id>
      <journal-title>Journal of Medical Internet Research</journal-title>
      <issn pub-type="epub">1438-8871</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v25i1e47621</article-id>
      <article-id pub-id-type="pmid">37713254</article-id>
      <article-id pub-id-type="doi">10.2196/47621</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>The Potential of ChatGPT as a Self-Diagnostic Tool in Common Orthopedic Diseases: Exploratory Study</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Eysenbach</surname>
            <given-names>Gunther</given-names>
          </name>
        </contrib>
        <contrib contrib-type="editor">
          <name>
            <surname>Leung</surname>
            <given-names>Tiffany</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Shreve</surname>
            <given-names>Jacob</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Nakayama</surname>
            <given-names>Masanori</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Inoue</surname>
            <given-names>Akitoshi</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Chrimes</surname>
            <given-names>Dillon</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author">
          <name name-style="western">
            <surname>Kuroiwa</surname>
            <given-names>Tomoyuki</given-names>
          </name>
          <degrees>MD, PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-9942-1811</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author">
          <name name-style="western">
            <surname>Sarcon</surname>
            <given-names>Aida</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-2763-878X</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Ibara</surname>
            <given-names>Takuya</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff4" ref-type="aff">4</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-0518-1918</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Yamada</surname>
            <given-names>Eriku</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-8777-9552</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author">
          <name name-style="western">
            <surname>Yamamoto</surname>
            <given-names>Akiko</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-3639-8201</ext-link>
        </contrib>
        <contrib id="contrib6" contrib-type="author">
          <name name-style="western">
            <surname>Tsukamoto</surname>
            <given-names>Kazuya</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-4927-2149</ext-link>
        </contrib>
        <contrib id="contrib7" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Fujita</surname>
            <given-names>Koji</given-names>
          </name>
          <degrees>MD, PhD</degrees>
          <xref rid="aff4" ref-type="aff">4</xref>
          <xref rid="aff5" ref-type="aff">5</xref>
          <address>
            <institution>Division of Medical Design Innovations</institution>
            <institution>Open Innovation Center, Institute of Research Innovation</institution>
            <institution>Tokyo Medical and Dental University</institution>
            <addr-line>1-5-45 Yushima, Bunkyo-ku</addr-line>
            <addr-line>Tokyo, 1138519</addr-line>
            <country>Japan</country>
            <phone>81 358035279</phone>
            <fax>81 358035281</fax>
            <email>fujiorth@tmd.ac.jp</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-3733-0188</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Department of Orthopaedic and Spinal Surgery</institution>
        <institution>Graduate School of Medical and Dental Sciences</institution>
        <institution>Tokyo Medical and Dental University</institution>
        <addr-line>Tokyo</addr-line>
        <country>Japan</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Division of Orthopedic Surgery Research</institution>
        <institution>Mayo Clinic</institution>
        <addr-line>Rochester, MN</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>Department of Surgery</institution>
        <institution>Mayo Clinic</institution>
        <addr-line>Rochester, MN</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff4">
        <label>4</label>
        <institution>Department of Functional Joint Anatomy</institution>
        <institution>Graduate School of Medical and Dental Sciences</institution>
        <institution>Tokyo Medical and Dental University</institution>
        <addr-line>Tokyo</addr-line>
        <country>Japan</country>
      </aff>
      <aff id="aff5">
        <label>5</label>
        <institution>Division of Medical Design Innovations</institution>
        <institution>Open Innovation Center, Institute of Research Innovation</institution>
        <institution>Tokyo Medical and Dental University</institution>
        <addr-line>Tokyo</addr-line>
        <country>Japan</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Koji Fujita <email>fujiorth@tmd.ac.jp</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <year>2023</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>15</day>
        <month>9</month>
        <year>2023</year>
      </pub-date>
      <volume>25</volume>
      <elocation-id>e47621</elocation-id>
      <history>
        <date date-type="received">
          <day>27</day>
          <month>3</month>
          <year>2023</year>
        </date>
        <date date-type="rev-request">
          <day>26</day>
          <month>4</month>
          <year>2023</year>
        </date>
        <date date-type="rev-recd">
          <day>17</day>
          <month>5</month>
          <year>2023</year>
        </date>
        <date date-type="accepted">
          <day>17</day>
          <month>8</month>
          <year>2023</year>
        </date>
      </history>
      <copyright-statement>©Tomoyuki Kuroiwa, Aida Sarcon, Takuya Ibara, Eriku Yamada, Akiko Yamamoto, Kazuya Tsukamoto, Koji Fujita. Originally published in the Journal of Medical Internet Research (https://www.jmir.org), 15.09.2023.</copyright-statement>
      <copyright-year>2023</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in the Journal of Medical Internet Research, is properly cited. The complete bibliographic information, a link to the original publication on https://www.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://www.jmir.org/2023/1/e47621" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Artificial intelligence (AI) has gained tremendous popularity recently, especially the use of natural language processing (NLP). ChatGPT is a state-of-the-art chatbot capable of creating natural conversations using NLP. The use of AI in medicine can have a tremendous impact on health care delivery. Although some studies have evaluated ChatGPT’s accuracy in self-diagnosis, there is no research regarding its precision and the degree to which it recommends medical consultations.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>The aim of this study was to evaluate ChatGPT’s ability to accurately and precisely self-diagnose common orthopedic diseases, as well as the degree of recommendation it provides for medical consultations.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>Over a 5-day course, each of the study authors submitted the same questions to ChatGPT. The conditions evaluated were carpal tunnel syndrome (CTS), cervical myelopathy (CM), lumbar spinal stenosis (LSS), knee osteoarthritis (KOA), and hip osteoarthritis (HOA). Answers were categorized as either correct, partially correct, incorrect, or a differential diagnosis. The percentage of correct answers and reproducibility were calculated. The reproducibility between days and raters were calculated using the Fleiss κ coefficient. Answers that recommended that the patient seek medical attention were recategorized according to the strength of the recommendation as defined by the study.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>The ratios of correct answers were 25/25, 1/25, 24/25, 16/25, and 17/25 for CTS, CM, LSS, KOA, and HOA, respectively. The ratios of incorrect answers were 23/25 for CM and 0/25 for all other conditions. The reproducibility between days was 1.0, 0.15, 0.7, 0.6, and 0.6 for CTS, CM, LSS, KOA, and HOA, respectively. The reproducibility between raters was 1.0, 0.1, 0.64, –0.12, and 0.04 for CTS, CM, LSS, KOA, and HOA, respectively. Among the answers recommending medical attention, the phrases “essential,” “recommended,” “best,” and “important” were used. Specifically, “essential” occurred in 4 out of 125, “recommended” in 12 out of 125, “best” in 6 out of 125, and “important” in 94 out of 125 answers. Additionally, 7 out of the 125 answers did not include a recommendation to seek medical attention.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>The accuracy and reproducibility of ChatGPT to self-diagnose five common orthopedic conditions were inconsistent. The accuracy could potentially be improved by adding symptoms that could easily identify a specific location. Only a few answers were accompanied by a strong recommendation to seek medical attention according to our study standards. Although ChatGPT could serve as a potential first step in accessing care, we found variability in accurate self-diagnosis. Given the risk of harm with self-diagnosis without medical follow-up, it would be prudent for an NLP to include clear language alerting patients to seek expert medical opinions. We hope to shed further light on the use of AI in a future clinical study.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>ChatGPT</kwd>
        <kwd>generative pretrained transformer</kwd>
        <kwd>natural language processing</kwd>
        <kwd>artificial intelligence</kwd>
        <kwd>chatbot</kwd>
        <kwd>diagnosis</kwd>
        <kwd>self-diagnosis</kwd>
        <kwd>accuracy</kwd>
        <kwd>precision</kwd>
        <kwd>language model</kwd>
        <kwd>orthopedic disease</kwd>
        <kwd>AI model</kwd>
        <kwd>health information</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <p>Recently, the field of artificial intelligence (AI) has made remarkable progress. The applications of AI in health care have also gained attention [<xref ref-type="bibr" rid="ref1">1</xref>-<xref ref-type="bibr" rid="ref5">5</xref>]. One of the most popular forms of AI involves using a natural language processing (NLP) system. In medicine, researchers have used NLP to extract unstructured data from medical records, followed by organization of the output [<xref ref-type="bibr" rid="ref6">6</xref>-<xref ref-type="bibr" rid="ref9">9</xref>]. Some have advocated for the use of an NLP as a prognostic or diagnostic tool [<xref ref-type="bibr" rid="ref10">10</xref>-<xref ref-type="bibr" rid="ref12">12</xref>]; however, further investigation is warranted. ChatGPT (OpenAI, San Francisco, CA, USA) was released in November 2022. ChatGPT is a sophisticated chatbot that uses an NLP model capable of both supervised and forced learning; it can understand the context of a sentence from only a few words. ChatGPT is also thought to possess the ability to translate languages and analyze customer experience if implemented as a survey [<xref ref-type="bibr" rid="ref13">13</xref>]. Hence, its popularity has been growing rapidly [<xref ref-type="bibr" rid="ref14">14</xref>]. Despite not being explicitly designed for health care, ChatGPT has also been increasingly used in health care contexts [<xref ref-type="bibr" rid="ref3">3</xref>,<xref ref-type="bibr" rid="ref15">15</xref>]. ChatGPT can be helpful in aiding health care providers in formulating differential diagnoses or assisting patients in self-diagnosing conditions before seeking medical attention. Nonetheless, it is still unclear whether digital self-diagnostic tools truly provide health benefits to patients, and multiple studies have raised concerns about their accuracy in triage and diagnosis [<xref ref-type="bibr" rid="ref16">16</xref>-<xref ref-type="bibr" rid="ref20">20</xref>]. If we leap into the realm of AI and its health care applications, we must first understand whether ChatGPT can accurately and precisely assist with self-diagnosis to reduce the risk of error, which would cause harm to the patient. The clinical significance of this application of ChatGPT is that patients would have access to a readily available platform to diagnose a condition correctly and later seek medical attention for management. However, few studies have evaluated the accuracy of ChatGPT’s ability to support self-diagnosis [<xref ref-type="bibr" rid="ref21">21</xref>,<xref ref-type="bibr" rid="ref22">22</xref>].</p>
      <p>In addition to accuracy, it is equally important to evaluate precision, since it is challenging to rely on a self-diagnostic tool that provides inconsistent answers across different days and users. Additionally, an AI chatbot is not a substitute for medical care and should appropriately recommend seeking medical consultation after self-diagnosis. However, there is no research evaluating both the precision of ChatGPT’s responses and the degree to which it recommends medical attention.</p>
      <p>Therefore, the purpose of this study was to assess the accuracy and precision of ChatGPT in self-diagnosis and to assess the degree of medical provider recommendation in its answers. We evaluated five common orthopedic symptoms/diseases since orthopedic complaints are very common in practice as they comprise up to 26% of the reasons why patients seek care [<xref ref-type="bibr" rid="ref23">23</xref>]. For each of the diseases, we submitted a few characteristic symptoms to ChatGPT, and then we evaluated the accuracy (percentage of correct responses) and precision of the chatbot’s responses.</p>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Ethical Considerations</title>
        <p>Ethical review was not required since our research uses neither humans, animals, nor any of their information.</p>
      </sec>
      <sec>
        <title>Study Design</title>
        <p>Over a 5-day period (February 20 to 24, 2023, between the hours of 12 AM and 3 PM), the study authors (TI, EY, AY, KT, and KF) submitted the same questions to ChatGPT (GPT version 3.5) (see <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> for an example). Each question was submitted daily to evaluate the variation in responses. At the end of the study period, all answers generated by the chatbot were recorded and sent to one study author (TK) for analysis. Additionally, each author who questioned ChatGPT provided the details of the operating system (OS) and browser software they used when conducting this experiment.</p>
      </sec>
      <sec>
        <title>Diseases and Questions</title>
        <p>We evaluated five common orthopedic diseases: carpal tunnel syndrome (CTS), cervical myelopathy (CM), lumbar spinal stenosis (LSS), knee osteoarthritis (KOA), and hip osteoarthritis (HOA). These diseases were chosen as they were felt to contain a wide variety of symptoms from joint and lower back pain to neuropathy, which are typical reasons for seeking care [<xref ref-type="bibr" rid="ref23">23</xref>]. To help standardize a uniform set of questions, five orthopedic surgeons and one physical therapist engaged in discussions with English-speaking surgeons to obtain an expert consensus on common symptoms and plain-language questions. We also refined each question by using the Mayo Clinic [<xref ref-type="bibr" rid="ref24">24</xref>,<xref ref-type="bibr" rid="ref25">25</xref>], Cleveland Clinic [<xref ref-type="bibr" rid="ref26">26</xref>,<xref ref-type="bibr" rid="ref27">27</xref>], and Johns Hopkins Medicine [<xref ref-type="bibr" rid="ref28">28</xref>,<xref ref-type="bibr" rid="ref29">29</xref>] websites. The initial questions are listed in <xref ref-type="boxed-text" rid="box1">Textbox 1</xref>.</p>
        <p>To identify means of improving the accuracy of ChatGPT’s self-assessment, nine additional questions (<xref ref-type="boxed-text" rid="box2">Textbox 2</xref>) were included in the study over a 5-day period (April 30 to May 4, 2023). Questions 1a, 2a, 3a, 4a, and 5a were in addition to the original questions, which required ChatGPT to provide a primary diagnosis along with five potential differential diagnoses. Question 2b was designed for cases where subjective symptoms of the patient with CM were limited to the upper extremities. Questions 1c, 4c, and 5c were rephrased due to concerns that questions 4 and 5, unlike question 1c, began with “My knee” or “My hip,” which might have reduced the accuracy and precision of the answers.</p>
        <boxed-text id="box1" position="float">
          <title>Initial questions to assess five common orthopedic diseases.</title>
          <p>1．I have tingling and pain in my fingers (especially at night). I also have difficulty picking up small objects. What is this disease?</p>
          <p>2．I have numbness in my hands. I also have difficulty doing fine movements to handle small objects, such as buttoning a shirt. I have an unsteady walk (especially when going downstairs). What is this disease?</p>
          <p>3．I have pain in my lower back. I also have numbness and pain in my buttocks and calves. The pain increases when I have been walking for a while but improves when I lean slightly forward. What is this disease?</p>
          <p>4．My knee is swollen and hurts when I walk. When bending my knee, I feel stiff and hear cracking. What is this disease?</p>
          <p>5．My hip hurts when I walk. When moving my hip, I feel stiff and hear cracking. What is this disease?</p>
        </boxed-text>
        <boxed-text id="box2" position="float">
          <title>Refinement of questions to improve the accuracy of assessment.</title>
          <p>Q1a. I have tingling and pain in my fingers (especially at night). I also have difficulty picking up small objects. What is this disease? Can you give me a primary diagnosis and a list of five potential differential diagnoses?</p>
          <p>Q1c. My fingers tingle and hurt (especially at night). I also have difficulty picking up small objects. What is this disease?</p>
          <p>Q2a. I have numbness in my hands. I also have difficulty doing fine movements to handle small objects, such as buttoning a shirt. I have an unsteady walk (especially when going downstairs). What is this disease? Can you give me a primary diagnosis and a list of five potential differential diagnoses?</p>
          <p>Q2b. I have numbness in my hands. I also have difficulty doing fine movements to handle small objects, such as buttoning a shirt. What is this disease?</p>
          <p>Q3a. I have pain in my lower back. I also have numbness and pain in my buttocks and calves. The pain increases when I have been walking for a while but improves when I lean slightly forward. What is this disease? Can you give me a primary diagnosis and a list of five potential differential diagnoses?</p>
          <p>Q4a. My knee is swollen and hurts when I walk. When bending my knee, I feel stiff and hear cracking. What is this disease? Can you give me a primary diagnosis and a list of five potential differential diagnoses?</p>
          <p>Q4c. I have knee swelling and pain when I walk. When bending my knee, I feel stiff and hear cracking. What is this disease?</p>
          <p>Q5a. My hip hurts when I walk. When moving my hip, I feel stiff and hear cracking. What is this disease? Can you give me a primary diagnosis and a list of five potential differential diagnoses?</p>
          <p>Q5c. I have hip swelling and pain when I walk. When moving my hip, I feel stiff and hear cracking. What is this disease?</p>
        </boxed-text>
      </sec>
      <sec>
        <title>Accuracy Assessment</title>
        <p>One of the study authors (TK), who did not pose questions to ChatGPT, evaluated the responses of ChatGPT (see <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). The responses were categorized as shown in <xref rid="figure1" ref-type="fig">Figure 1</xref>. Briefly, they were either (1) one solitary diagnosis, (2) hierarchical diagnoses with other potential causes, and (3) multiple diagnoses. “Solitary diagnosis” encompassed cases where only one possible diagnosis was raised in the response. “Hierarchical diagnoses” involved cases where a single most likely diagnosis was provided in the response, followed by several other possible diagnoses. “Multiple diagnoses” involved cases where multiple possible diagnoses were presented without hierarchy in the response. If an answer included one solitary answer or hierarchical diagnoses, it was then evaluated for correctness. If a solitary diagnosis or the top diagnosis in the hierarchical diagnoses was correct, the answer was considered correct; if the correct diagnosis was included among the other possible diagnoses in the hierarchical diagnoses, it was considered partially correct. In the case of multiple diagnoses, the response was categorized as a differential diagnosis. Lastly, if neither of the prior phrases occurred in the response, it was categorized as incorrect.</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>Accuracy assessment as defined by the study. After submitting the study questions to ChatGPT, the responses generated were either categorized as “solitary diagnosis,” “hierarchical diagnosis,” or “multiple diagnoses.” The correctness of the response was evaluated, except for the “multiple diagnoses” as it was considered its own category.</p>
          </caption>
          <graphic xlink:href="jmir_v25i1e47621_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Precision Assessment</title>
        <p>The precision assessment is shown in <xref rid="figure2" ref-type="fig">Figure 2</xref>. To assess the variability of responses, we evaluated the precision of the chatbot’s ability to diagnose each disease. The same three responses were seen as described above. We evaluated the number of times a solitary disease or a differential diagnosis was answered daily.</p>
        <p>Additionally, the incorrect answer ratio within answers that presented solitary or hierarchical diagnoses was calculated separately. Note that differential diagnoses were excluded from the denominator.</p>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>Precision assessment as defined by the study. Similar to the format used for accuracy assessment shown in Figure 1, ChatGPT either reported a “solitary diagnosis,” “hierarchical diagnosis,” or “multiple diagnoses.” For either a “solitary diagnosis” or “hierarchical diagnosis," the most probable diagnosis was categorized into the reported disease (ie, Disease A, B, C, D). Multiple diagnoses comprised a separate category. The responses were evaluated daily.</p>
          </caption>
          <graphic xlink:href="jmir_v25i1e47621_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Recommendations</title>
        <p>To evaluate the extent to which ChatGPT recommended seeking care, we searched for words that included the terms “medical,” “health care,” “doctor,” or similar terminology. Subcategories were analyzed by the percentage of each phrase reported. We evaluated the strength of each phrase. We defined a strong recommendation when the phrases included the word “essential” and/or “recommendation”; other phrases were considered to indicate a weaker recommendation.</p>
        <p>Furthermore, the percentage of the number of words in an answer that was used to recommend seeking care was calculated using the following equation: number of words used to recommend seeking care/total number of words.</p>
      </sec>
      <sec>
        <title>Statistical Analysis</title>
        <p>The precision and accuracy were calculated separately for each disease.</p>
        <p>Accuracy was assessed using the “correct answer ratio,” which represents the average percentage of correct answers over the 5-day period. This value was obtained by using the average of the values reported by each rater. Similarly, the “error answer ratio” was defined as the average percentage of incorrect answers observed during the 5-day period.</p>
        <p>For precision evaluation, the reproducibility between days and raters was evaluated separately. The number of raters was determined to be five, which was equal to the number of questions according to a previous study, which stated that the number of raters in a study assessing reliability between raters should be the same as the number of subjects [<xref ref-type="bibr" rid="ref30">30</xref>]. For accuracy, the Fleiss κ coefficient between the categorical variables of the five answers in one rater was calculated and the median of the five values in five raters was regarded as the reproducibility between days [<xref ref-type="bibr" rid="ref31">31</xref>]. For precision, the Fleiss κ coefficient between the answers on the same day was calculated and the mean over the 5 days served as the reproducibility between raters [<xref ref-type="bibr" rid="ref31">31</xref>]. Fleiss κ coefficients were evaluated as follows: &#60; 0, poor; 0.01-0.20, slight; 0.21-0.40, fair; 0.41-0.60, moderate; 0.61-0.80, substantial; and 0.81-1.00, almost perfect [<xref ref-type="bibr" rid="ref32">32</xref>].</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Summary of Answers to the Questions</title>
        <p>The summary of answers to the initial questions are presented in <xref ref-type="table" rid="table1">Table 1</xref> and the full text of the answers is shown in <xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>. In response to the question regarding CTS, ChatGPT diagnosed CTS in all answers. In response to the question regarding CM, ChatGPT either diagnosed peripheral neuropathy, multiple sclerosis, a neurological disorder, or presented differential diagnoses. Regarding the question about LSS, ChatGPT diagnosed either LSS or sciatica, or presented differential diagnoses. Regarding the questions about KOA and HOA, ChatGPT diagnosed KOA and HOA, respectively, or presented differential diagnoses.</p>
        <p>The OS and browser software used by each rater when using ChatGPT are presented in <xref ref-type="table" rid="table2">Table 2</xref>.</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Diagnoses provided by ChatGPT in response to questions categorized by rater and day.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="180"/>
            <col width="150"/>
            <col width="150"/>
            <col width="150"/>
            <col width="150"/>
            <col width="190"/>
            <thead>
              <tr valign="top">
                <td colspan="2">Question</td>
                <td>Day 1</td>
                <td>Day 2</td>
                <td>Day 3</td>
                <td>Day 4</td>
                <td>Day 5</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="7">
                  <bold>I have tingling and pain in my fingers (especially at night). I also have difficulty picking up small objects. What is this disease?</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Rater 1</td>
                <td>CTS<sup>a</sup></td>
                <td>CTS</td>
                <td>CTS</td>
                <td>CTS</td>
                <td>CTS</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Rater 2</td>
                <td>CTS</td>
                <td>CTS</td>
                <td>CTS</td>
                <td>CTS</td>
                <td>CTS</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Rater 3</td>
                <td>CTS</td>
                <td>CTS</td>
                <td>CTS</td>
                <td>CTS</td>
                <td>CTS</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Rater 4</td>
                <td>CTS</td>
                <td>CTS</td>
                <td>CTS</td>
                <td>CTS</td>
                <td>CTS</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Rater 5</td>
                <td>CTS</td>
                <td>CTS</td>
                <td>CTS</td>
                <td>CTS</td>
                <td>CTS</td>
              </tr>
              <tr valign="top">
                <td colspan="7">
                  <bold>I have numbness in my hands. I also have difficulty doing fine movements to handle small objects, such as buttoning a shirt. I have an unsteady walk (especially when going downstairs). What is this disease?</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Rater 1</td>
                <td>PN<sup>b</sup></td>
                <td>DD<sup>c</sup></td>
                <td>MS<sup>d</sup></td>
                <td>MS</td>
                <td>MS</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Rater 2</td>
                <td>DD</td>
                <td>MS</td>
                <td>MS</td>
                <td>DD</td>
                <td>PN</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Rater 3</td>
                <td>MS</td>
                <td>PN</td>
                <td>PN</td>
                <td>DD</td>
                <td>MS</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Rater 4</td>
                <td>PN</td>
                <td>PN</td>
                <td>PN</td>
                <td>DD</td>
                <td>ND<sup>e</sup></td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Rater 5</td>
                <td>PN</td>
                <td>PN</td>
                <td>PN</td>
                <td>PN</td>
                <td>PN</td>
              </tr>
              <tr valign="top">
                <td colspan="7">
                  <bold>I have pain in my lower back. I also have numbness and pain in my buttocks and calves. The pain increases when I have been walking for a while but improves when I lean slightly forward. What is this disease?</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Rater 1</td>
                <td>LSS<sup>f</sup></td>
                <td>LSS</td>
                <td>LSS</td>
                <td>LSS</td>
                <td>LSS</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Rater 2</td>
                <td>LSS</td>
                <td>SC<sup>g</sup></td>
                <td>LSS</td>
                <td>LSS</td>
                <td>SC</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Rater 3</td>
                <td>LSS</td>
                <td>LSS</td>
                <td>LSS</td>
                <td>LSS</td>
                <td>LSS</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Rater 4</td>
                <td>DD</td>
                <td>LSS</td>
                <td>LSS</td>
                <td>LSS</td>
                <td>LSS</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Rater 5</td>
                <td>LSS</td>
                <td>LSS</td>
                <td>LSS</td>
                <td>LSS</td>
                <td>LSS</td>
              </tr>
              <tr valign="top">
                <td colspan="7">
                  <bold>My knee is swollen and hurts when I walk. When bending my knee, I feel stiff and hear cracking. What is this disease?</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Rater 1</td>
                <td>KOA<sup>h</sup></td>
                <td>KOA</td>
                <td>KOA</td>
                <td>KOA</td>
                <td>KOA</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Rater 2</td>
                <td>KOA</td>
                <td>DD</td>
                <td>DD</td>
                <td>KOA</td>
                <td>DD</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Rater 3</td>
                <td>KOA</td>
                <td>KOA</td>
                <td>KOA</td>
                <td>DD</td>
                <td>KOA</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Rater 4</td>
                <td>DD</td>
                <td>DD</td>
                <td>DD</td>
                <td>DD</td>
                <td>DD</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Rater 5</td>
                <td>KOA</td>
                <td>KOA</td>
                <td>KOA</td>
                <td>KOA</td>
                <td>KOA</td>
              </tr>
              <tr valign="top">
                <td colspan="7">
                  <bold>My hip hurts when I walk. When moving my hip, I feel stiff and hear cracking. What is this disease?</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Rater 1</td>
                <td>HOA<sup>i</sup></td>
                <td>HOA</td>
                <td>HOA</td>
                <td>HOA</td>
                <td>HOA</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Rater 2</td>
                <td>DD</td>
                <td>HOA</td>
                <td>HOA</td>
                <td>DD</td>
                <td>HOA</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Rater 3</td>
                <td>HOA</td>
                <td>HOA</td>
                <td>HOA</td>
                <td>DD</td>
                <td>HOA</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Rater 4</td>
                <td>DD</td>
                <td>DD</td>
                <td>DD</td>
                <td>DD</td>
                <td>DD</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Rater 5</td>
                <td>HOA</td>
                <td>HOA</td>
                <td>HOA</td>
                <td>HOA</td>
                <td>HOA</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table1fn1">
              <p><sup>a</sup>CTS: carpal tunnel syndrome.</p>
            </fn>
            <fn id="table1fn2">
              <p><sup>b</sup>PN: peripheral neuropathy.</p>
            </fn>
            <fn id="table1fn3">
              <p><sup>c</sup>DD: differential diagnosis; categorized when ChatGPT provided a differential diagnosis with no hierarchy.</p>
            </fn>
            <fn id="table1fn4">
              <p><sup>d</sup>MS: multiple sclerosis.</p>
            </fn>
            <fn id="table1fn5">
              <p><sup>e</sup>ND: neurological disorder; judged as a correct answer because, although it is not the disease that was assumed, it is not an error.</p>
            </fn>
            <fn id="table1fn6">
              <p><sup>f</sup>LSS: lumber spinal stenosis.</p>
            </fn>
            <fn id="table1fn7">
              <p><sup>g</sup>SC: sciatica; judged as a correct answer because, although it is not the disease that was assumed, it is not an error.</p>
            </fn>
            <fn id="table1fn8">
              <p><sup>h</sup>KOA: knee osteoarthritis.</p>
            </fn>
            <fn id="table1fn9">
              <p><sup>i</sup>HOA: hip osteoarthritis.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>Operating system and browser software used by each rater.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="200"/>
            <col width="240"/>
            <col width="560"/>
            <thead>
              <tr valign="top">
                <td>Rater</td>
                <td>Operating system</td>
                <td>Browser software</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>1</td>
                <td>Windows 10</td>
                <td>Google Chrome</td>
              </tr>
              <tr valign="top">
                <td>2</td>
                <td>Windows 11</td>
                <td>Google Chrome</td>
              </tr>
              <tr valign="top">
                <td>3</td>
                <td>iOS 15.5</td>
                <td>Safari</td>
              </tr>
              <tr valign="top">
                <td>4</td>
                <td>Mac Monterey 12.1</td>
                <td>Google Chrome</td>
              </tr>
              <tr valign="top">
                <td>5</td>
                <td>Mac Monterey 12.1</td>
                <td> Safari</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
      </sec>
      <sec>
        <title>Accuracy Assessment</title>
        <p>The correct answer ratios varied for each disease (<xref rid="figure3" ref-type="fig">Figure 3</xref>). The ratios were 25/25 (100%) for CTS, 1/25 (4%) for CM, 24/25 (96%) for LSS, 16/25 (64%) for KOA, and 17/25 (68%) for HOA. Only CM had a high error answer ratio (23/25, 92%), whereas the error ratio was 0/25 (0%) for the other conditions.</p>
        <p>The error answer ratio within answers that presented solitary diagnoses was 93% (16/17) for CM only and 0% for the others (0/18 for CTS, 0/20 for LSS, 0/7 for KOA, and 0/7 for HOA). The error answer ratio within answers that presented hierarchical diagnoses was 100% (7/7) for CM only and 0% for the others (0/7 for CTS, 0/4, 0/9 for KOA, and 0/9 for HOA).</p>
        <p>The full text of the answers to the additional questions is shown in <xref ref-type="supplementary-material" rid="app3">Multimedia Appendix 3</xref>. The correct answer ratios for the additional questions (<xref ref-type="boxed-text" rid="box2">Textbox 2</xref>) varied for each disease (<xref rid="figure4" ref-type="fig">Figure 4</xref>): 24/25 (96%) for Q1a (CTS), 24/25 (96%) for Q1c (CTS), 0/25 (0%) for Q2a (CM), 1/25 (4%) for Q2b (CM), 25/25 (100%) for Q3a (LSS), 22/25 (88%) for Q4a (KOA), 23/25 (92%) for Q4c (KOA), 23/25 (92%) for Q5a (HOA), and 22/25 (88%) for Q5c (HOA). Only Q2a (CM) and Q2b (CM) received incorrect answers (13/25, 52% and 23/25, 92%, respectively) and other questions received no incorrect answers. In the answers to Q2b, CTS, which was not presented in the answer for the original CM question (Question 2), appeared with a rate of 80%.</p>
        <fig id="figure3" position="float">
          <label>Figure 3</label>
          <caption>
            <p>Correct answer ratio of each of the tested orthopedic conditions. CM had the highest incorrect answer choice and CTS had the highest percent correct. CM: cervical myelopathy; CTS: carpal tunnel syndrome; HOA: hip osteoarthritis; KOA: knee osteoarthritis; LSS: lumbar spinal stenosis.</p>
          </caption>
          <graphic xlink:href="jmir_v25i1e47621_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <fig id="figure4" position="float">
          <label>Figure 4</label>
          <caption>
            <p>Correct answer ratio of the answers to the additional questions (see Textbox 2). CM: cervical myelopathy; CTS: carpal tunnel syndrome; HOA: hip osteoarthritis; KOA: knee osteoarthritis; LSS: lumbar spinal stenosis.</p>
          </caption>
          <graphic xlink:href="jmir_v25i1e47621_fig4.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <p>Except for the answers to Q2a (CM) and Q2b (CM), all other answers showed high percentages of correct answer ratios. Approximately half of the answers to Q2a (CM) were partially correct.</p>
      </sec>
      <sec>
        <title>Precision Assessment</title>
        <p><xref rid="figure5" ref-type="fig">Figure 5</xref> shows the ratio of presented diseases and differential diagnoses among the answers. Reproducibility between days was 1.0, 0.15, 0.7, 0.6, and 0.6 for CTS, CM, LSS, KOA, and HOA, respectively. Reproducibility between the raters was 1.0, 0.1, 0.64, –0.12, and 0.04 for CTS, CM, LSS, KOA, and HOA, respectively. Daily and per-rater Fleiss κ and P values are listed in <xref ref-type="supplementary-material" rid="app4">Multimedia Appendix 4</xref>.</p>
        <fig id="figure5" position="float">
          <label>Figure 5</label>
          <caption>
            <p>Precision assessment. The ratio of the presented responses by ChatGPT are shown. A reproducibility coefficient of 1.00 was defined as perfect precision. CM: cervical myelopathy; CTS: carpal tunnel syndrome; HOA: hip osteoarthritis; KOA: knee osteoarthritis; LSS: lumbar spinal stenosis.</p>
          </caption>
          <graphic xlink:href="jmir_v25i1e47621_fig5.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Recommendations</title>
        <p><xref ref-type="table" rid="table3">Table 3</xref> shows the results on recommendations. The following key phrases were found: “essential,” “recommended,” “best,” and “important.” Many of the answers included only the word “important,” whereas only a few answers used strong words such as “essential” and “recommended.” Additionally, some answers did not provide any recommendations.</p>
        <p>Overall, 16 out of 125 (12.8%) answers contained a word count percentage of 20% or more, indicating a recommendation for medical consultation, whereas 71 out of 125 (56.8%) answers had a percentage between 10% and 20%, 31 out of 125 (24.8%) of all answers had a percentage between 0% and 10%, and 7 out of 125 (5.6%) answers did not include any of these words.</p>
        <table-wrap position="float" id="table3">
          <label>Table 3</label>
          <caption>
            <p>Phrases used to recommend seeking medical care (N=125).</p>
          </caption>
          <table border="1" rules="groups" cellpadding="5" frame="hsides" width="1000" cellspacing="0">
            <col width="400"/>
            <col width="600"/>
            <thead>
              <tr valign="top">
                <td>Phrase</td>
                <td>Frequency of use, n (%)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Essential</td>
                <td>4 (3.2)</td>
              </tr>
              <tr valign="top">
                <td>Recommend</td>
                <td>12 (9.6)</td>
              </tr>
              <tr valign="top">
                <td>Best</td>
                <td>8 (6.4)</td>
              </tr>
              <tr valign="top">
                <td>Important</td>
                <td>94 (75.2)</td>
              </tr>
              <tr valign="top">
                <td>None</td>
                <td>7 (5.6)</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <p>This is the first study to evaluate ChatGPT’s ability to self-diagnose. Over a 5-day period, we submitted common symptoms to ChatGPT and evaluated its response for accuracy and precision. Generally, ChatGPT had the ability to generate high correct answer ratios, with the exception of the self-diagnosis of CM. Reproducibility was variable and disease-dependent. These results suggest that ChatGPT is inconsistent in both accuracy and precision to self-diagnose in its current form. By having ChatGPT present the five possible differential diagnoses, the ratio of correct answers for the questions on KOA and HOA was increased and the error answer ratio for the question on CM was decreased. Additionally, avoiding starting the question with “My knee” or “My hip” further improved the ratio of correct answers for KOA and HOA.</p>
      </sec>
      <sec>
        <title>Comparison With Previous Studies</title>
        <p>Hirosawa et al [<xref ref-type="bibr" rid="ref21">21</xref>] used ChatGPT to formulate a differential diagnosis. They found a 53.3% correct answer ratio. In our study, the correct answer ratio was similar in range (66.4%). However, there are several key differences between our studies. First, we evaluated orthopedic conditions, whereas Hirosawa et al [<xref ref-type="bibr" rid="ref21">21</xref>] focused on systemic symptoms as pertinent to diseases seen by an internist. Since systemic diseases are not site-specific (ie, fever and rash seen with lupus), this could potentially explain their lower accuracy score. Their submissions also included objective findings such as physical exam and vital signs. We deliberately omitted such findings to simulate a natural setting in which a patient would use ChatGPT for self-diagnosis. This promotes the generalizability of the questions from a patient’s perspective. However, a study that evaluates the inclusion of objective findings and differences in accuracy/precision would be helpful in the future. Johnson et al [<xref ref-type="bibr" rid="ref22">22</xref>] conducted an extensive inquiry with ChatGPT posing numerous medical questions and showed that the median accuracy of answers was fairly high. One might assume that their results demonstrated relatively higher accuracy compared to that obtained in our study and in that of Hirosawa et al [<xref ref-type="bibr" rid="ref21">21</xref>] because the questions were more medically detailed. However, the mean accuracy was slightly lower than its median, and the authors discussed that this difference reflected multiple surprisingly incorrect answers provided by ChatGPT. Since we also found significant variation in accuracy among answers in our study, the discussion of Hirosawa et al [<xref ref-type="bibr" rid="ref21">21</xref>] aligns with and supports our results.</p>
      </sec>
      <sec>
        <title>Accuracy Assessment</title>
        <p>CTS (100%) and LSS (96%) had the highest correct answer ratios, which were much lower for KOA (64%) and HOA (68%). One potential cause for this difference is that both KOA and HOA did not include disease-specific symptoms despite typical symptoms provided to ChatGPT. This suggests that ChatGPT was unable to narrow down the answers. Interestingly, the error answer ratios were 0% in all four diseases. Of the diseases, CM had the lowest correct answer ratio at only 4%. Given the symptoms, ChatGPT generated several potential answers, which included a neurological disorder, peripheral nerve disorder, and multiple sclerosis. Unfortunately, CM was not identified. One potential reason for this could be attributed to the multifocal symptoms of CM (involving both the hands and feet), unlike the other conditions that may be more regional (ie, CTS). This suggests that ChatGPT is incapable of localizing a disease that is multifocal. Another potential reason is that the site of the disease and the site of symptom manifestation are not always the same in cases of CM. In this study, the question regarding CM did not include any symptoms specific to the neck. Alternatively, the question concerning LSS involved lumbar pain symptoms. This disparity may have caused the variation in the ratios of correct answers observed between these two conditions.</p>
        <p>The low correct answer ratio in our study would suggest a risk of misdiagnosis and potential harm to the patient if this NLP tool is used in its current form. However, ChatGPT is a fine-tuned version of a chatbot, in which supervised and forced learning have been added to version GPT-3; thus, if ChatGPT had been educated on specific medical terms during this additional learning, a far higher degree of accuracy could have been achieved by incorporating those terms into our questions. Otherwise, this could have been overcome by including more site-specific symptoms when submitting the questions. We plan to conduct additional study to determine which question formats/words will increase the accuracy of self-diagnostic support provided by ChatGPT.</p>
        <p>Although we asked simple and concise questions in this study, patients may ask more complex and difficult questions. It has been suggested that ChatGPT lacks “factual correctness” [<xref ref-type="bibr" rid="ref33">33</xref>] and may provide inaccurate information, especially when tasked to provide a specific answer to an esoteric question [<xref ref-type="bibr" rid="ref34">34</xref>]. To achieve a higher ratio of correct diagnoses for complex diseases in the context of self-diagnosis supported by ChatGPT, the questions may need to be stratified in a similar manner to that of an actual medical interview. Unfortunately, as seen in Multimedia Appendixes 2 and 3, at least the 3.5 version of ChatGPT did not attempt stratification (ie, ask clarifying questions back to the user) to increase the accuracy of the estimated diagnosis. However, the 4.0 version may return more in-depth questions. Otherwise, it is recommended that when developing an AI chat system specialized for medical self-diagnosis, it would be beneficial to incorporate a system that confidently asks follow-up questions to improve the accuracy of estimated diagnoses. Additionally, there is another notable concern that not only general users could be misinformed by ChatGPT, but even surgeons and physicians could pick up fraudulent papers generated by ChatGPT when seeking standardized medical responses [<xref ref-type="bibr" rid="ref35">35</xref>-<xref ref-type="bibr" rid="ref37">37</xref>]. This highlights the need for constant oversight of AI systems both in terms of design and usage. It is essential to involve government regulations and restrictions as well as conscientiousness from AI designers and the authors of the papers [<xref ref-type="bibr" rid="ref38">38</xref>].</p>
      </sec>
      <sec>
        <title>Precision Assessment</title>
        <p>Reproducibility varied and ranged from “poor” to “almost perfect,” even though we entered the same questions every time. The cause of this variability was unclear since the submissions were standardized at a fixed time and replicated among the raters. While the reproducibility between days exhibited moderate agreement for both KOA and HOA, the reproducibility between raters exhibited poor and slight agreement for KOA and HOA, respectively. The variability in responses may be a deliberate feature of ChatGPT since it mostly functions as a chatbot for social purposes. In this platform, it may be acceptable to have variable answers. However, if we are to apply this algorithm to health care, this variability may not be acceptable as it increases the risk of diagnostic error as made evident in the results. In the current form, ChatGPT has low reliability in self-diagnosing five common orthopedic conditions. It is also possible that ChatGPT may improve its reliability through learning, although this warrants further investigation. We could not detect any trends that would have caused differences in answers depending on the OS and browser software used. However, these factors might have decreased the reproducibility between raters.</p>
      </sec>
      <sec>
        <title>Recommendation for Medical Consultation</title>
        <p>Nearly 5.6% of the generated answers omitted any recommendation to seek care. Since ChatGPT is not a substitute for medical advice, it would be prudent for the chatbot to counsel the patient to seek medical attention for diagnostic validation and management. Without this, the patient is left without guidance on the appropriate next steps. Some may think that this language is often written by a software or program to avoid medical liability should an error occur [<xref ref-type="bibr" rid="ref17">17</xref>]. Since ChatGPT has inconsistent diagnostic capability, one would consider this a necessary feature should this be applied to health care. Although 79.6% of the answers recommended medical consultation for more than 10% of the total words, only 12.8% of the answers included a strong recommendation as set by the study standards with phrasing including either the term “essential” or “recommended.” The other phrases could be interpreted as rather vague since they indirectly recommend seeking care. Without direct language, it is possible that the patient is left confused after self-diagnosis, or worse, experience harm from a misdiagnosis. In fact, ChatGPT explicitly provides a disclaimer regarding these potential harms. Since it is not exclusively designed as a self-diagnostic tool for medical support, the inclusion of the disclaimer is understandably necessary. However, instead of solely focusing on limiting the use of AI chatbots for health care purposes to reduce the potential risk to users, several papers advocate that the following would be effective: (1) understanding and presenting the issues associated with the use of AI chatbots for health care purposes; (2) improving the technology and adapting it to appropriate health care applications; and (3) advocating for shared accountability and fostering collaboration among developers, subject matter experts, and human factors researchers [<xref ref-type="bibr" rid="ref3">3</xref>,<xref ref-type="bibr" rid="ref15">15</xref>,<xref ref-type="bibr" rid="ref39">39</xref>]. Our study aligns with these recommendations as well.</p>
      </sec>
      <sec>
        <title>Additional Questions</title>
        <p>The addition of the requirement to present the primary diagnosis and five potential differential diagnoses to the questions increased the ratios of correct answers for the questions on KOA and HOA (Q4a and Q5a in <xref ref-type="boxed-text" rid="box2">Textbox 2</xref>). This might have resulted from the higher frequency of knee and hip osteoarthritis, which was more likely to be selected as the primary diagnosis. Interestingly, CM was included within the potential differential diagnosis in approximately half of the answers to Q2a, reducing the error answer ratio to 52% because the percentage of partially correct answers increased. This would be a useful way to reduce the potential harm due to a misdiagnosis by ChatGPT.</p>
        <p>Q2b, designed for CM with only upper-extremity symptoms, presented the same percentages of correct and incorrect answers as the original CM question (Question 2). However, 80% of those answers showed CTS, which was not diagnosed based on the original question. This may offer further evidence of the large influence of a site-specific factor on the diagnoses provided by ChatGPT.</p>
        <p>The correct answer ratios increased for Q4c and Q5c, which were the questions modified to avoid phrases beginning with “My knee” or “My hip.” These results suggest that it may be better not to begin questions with phrases such as “My knee” when asking ChatGPT for a self-diagnosis.</p>
        <p>As mentioned above, this study found that modifying the way the questions are presented and incorporating additional requirements can affect the accuracy of ChatGPT’s answers. A review of online symptom checkers found that incorporating regional or seasonal data along with personal data improved their accuracy ratio [<xref ref-type="bibr" rid="ref18">18</xref>]. Incorporating such data in the questions posed to ChatGPT for self-diagnosis could lead to more accurate answers. Furthermore, a study recommended that self-diagnostic applications display the implicit diagnosis result with a percentage and present the rationale behind the diagnosis result [<xref ref-type="bibr" rid="ref40">40</xref>]. At this time, adding these suggestions to the question posed to ChatGPT may yield more useful answers.</p>
      </sec>
      <sec>
        <title>Limitations</title>
        <p>This study has several limitations. First, despite attempts to create questions that may simulate a patient’s question, they were not patient-derived questions. However, since this was a proof-of-concept study, it was felt that the questions would be sufficient to at least evaluate the accuracy and precision of the algorithm. We hope to address this limitation in future study since we will have patients submit their own questions. Second, we only tested five orthopedic diseases and thus this study may not represent the multitude of other orthopedic complaints. However, we felt that since these diseases are common, they warranted evaluation. Third, we did not compare our results using ChatGPT with those provided by other chatbots or publicly available data on medical conditions. Other chatbots may present better/worse results, and the easily accessible data do not always offer better support for self-diagnoses compared to that offered by chatbots. We plan to compare the difference between different chatbots in the future. Fourth, the OS and browser software used should have been consistent to eliminate their potential impact on the results. Fifth, it is possible that ChatGPT was trained using the six websites we referenced [<xref ref-type="bibr" rid="ref24">24</xref>-<xref ref-type="bibr" rid="ref29">29</xref>]. However, the significance of our study was not compromised and this was unrelated to the problem of reproducibility. Finally, a GPT-4 version of ChatGPT was released just after we conducted our experiment, which may provide more accurate answers. We plan to use this most recent version in our next study.</p>
      </sec>
      <sec>
        <title>Conclusion</title>
        <p>This is the first study to evaluate ChatGPT’s ability to accurately and precisely self-diagnose five common orthopedic conditions. We found that ChatGPT was inconsistent with respect to self-diagnosis. Of the five diseases, CM had the lowest percent correct ratio, likely due to its multifocal symptoms, which suggests that ChatGPT is incapable of localizing symptoms for such widespread diseases. Given the risk of error and potential harm from misdiagnosis, it is important for any diagnostic tool to direct guidance to seek medical care for confirmation of a disease. A future study with more disease conditions and patient-derived questions can help shed light on the role of NLP as a diagnostic tool.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>Screenshot of how to ask a question to ChatGPT (example). CTS: carpal tunnel syndrome.</p>
        <media xlink:href="jmir_v25i1e47621_app1.png" xlink:title="PNG File , 328 KB"/>
      </supplementary-material>
      <supplementary-material id="app2">
        <label>Multimedia Appendix 2</label>
        <p>Full text of the answers to the five questions for 5 days obtained by the study authors.</p>
        <media xlink:href="jmir_v25i1e47621_app2.doc" xlink:title="DOC File , 188 KB"/>
      </supplementary-material>
      <supplementary-material id="app3">
        <label>Multimedia Appendix 3</label>
        <p>Full text of the answers to the nine additional questions for 5 days obtained by the study authors.</p>
        <media xlink:href="jmir_v25i1e47621_app3.doc" xlink:title="DOC File , 419 KB"/>
      </supplementary-material>
      <supplementary-material id="app4">
        <label>Multimedia Appendix 4</label>
        <p>Daily and per-rater Fleiss κ and <italic>P</italic> values in the precision assessment.</p>
        <media xlink:href="jmir_v25i1e47621_app4.xlsx" xlink:title="XLSX File  (Microsoft Excel File), 11 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">AI</term>
          <def>
            <p>artificial intelligence</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">CM</term>
          <def>
            <p>cervical myelopathy</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">CTS</term>
          <def>
            <p>carpal tunnel syndrome</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">HOA</term>
          <def>
            <p>hip osteoarthritis</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">KOA</term>
          <def>
            <p>knee osteoarthritis</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">LSS</term>
          <def>
            <p>lumbar spinal stenosis</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">NLP</term>
          <def>
            <p>natural language processing</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">OS</term>
          <def>
            <p>operating system</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>We would like to thank Editage for English-language editing. Neither ChatGPT nor other generative language models were used for the ideation or writing process.</p>
    </ack>
    <notes>
      <sec>
        <title>Data Availability</title>
        <p>All data generated by ChatGPT during this study are included in the Multimedia Appendix files.</p>
      </sec>
    </notes>
    <fn-group>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Aldosari</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Ahmed</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>McDermott</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Stanmore</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>The use of digital health by South Asian communities: scoping review</article-title>
          <source>J Med Internet Res</source>
          <year>2023</year>
          <month>06</month>
          <day>12</day>
          <volume>25</volume>
          <fpage>e40425</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2023//e40425/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/40425</pub-id>
          <pub-id pub-id-type="medline">37307045</pub-id>
          <pub-id pub-id-type="pii">v25i1e40425</pub-id>
          <pub-id pub-id-type="pmcid">PMC10337427</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bounsall</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Milne-Ives</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Hall</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Carroll</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Meinert</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Artificial intelligence applications for assessment, monitoring, and management of Parkinson disease symptoms: protocol for a systematic review</article-title>
          <source>JMIR Res Protoc</source>
          <year>2023</year>
          <month>06</month>
          <day>14</day>
          <volume>12</volume>
          <fpage>e46581</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.researchprotocols.org/2023//e46581/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/46581</pub-id>
          <pub-id pub-id-type="medline">37314853</pub-id>
          <pub-id pub-id-type="pii">v12i1e46581</pub-id>
          <pub-id pub-id-type="pmcid">PMC10337354</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Choudhury</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Shamszare</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Investigating the impact of user trust on the adoption and use of ChatGPT: survey analysis</article-title>
          <source>J Med Internet Res</source>
          <year>2023</year>
          <month>06</month>
          <day>14</day>
          <volume>25</volume>
          <fpage>e47184</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2023//e47184/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/47184</pub-id>
          <pub-id pub-id-type="medline">37314848</pub-id>
          <pub-id pub-id-type="pii">v25i1e47184</pub-id>
          <pub-id pub-id-type="pmcid">PMC10337387</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Garvey</surname>
              <given-names>KV</given-names>
            </name>
            <name name-style="western">
              <surname>Thomas Craig</surname>
              <given-names>KJ</given-names>
            </name>
            <name name-style="western">
              <surname>Russell</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Novak</surname>
              <given-names>LL</given-names>
            </name>
            <name name-style="western">
              <surname>Moore</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Miller</surname>
              <given-names>BM</given-names>
            </name>
          </person-group>
          <article-title>Considering clinician competencies for the implementation of artificial intelligence-based tools in health care: findings from a scoping review</article-title>
          <source>JMIR Med Inform</source>
          <year>2022</year>
          <month>11</month>
          <day>16</day>
          <volume>10</volume>
          <issue>11</issue>
          <fpage>e37478</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://medinform.jmir.org/2022/11/e37478/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/37478</pub-id>
          <pub-id pub-id-type="medline">36318697</pub-id>
          <pub-id pub-id-type="pii">v10i11e37478</pub-id>
          <pub-id pub-id-type="pmcid">PMC9713618</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sekandi</surname>
              <given-names>JN</given-names>
            </name>
            <name name-style="western">
              <surname>Shi</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Zhu</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Kaggwa</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Mwebaze</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Application of artificial intelligence to the monitoring of medication adherence for tuberculosis treatment in Africa: algorithm development and validation</article-title>
          <source>JMIR AI</source>
          <year>2023</year>
          <month>2</month>
          <day>23</day>
          <volume>2</volume>
          <fpage>e40167</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://ai.jmir.org/2023/1/e40167"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/40167</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Murphy</surname>
              <given-names>RM</given-names>
            </name>
            <name name-style="western">
              <surname>Klopotowska</surname>
              <given-names>JE</given-names>
            </name>
            <name name-style="western">
              <surname>de Keizer</surname>
              <given-names>NF</given-names>
            </name>
            <name name-style="western">
              <surname>Jager</surname>
              <given-names>KJ</given-names>
            </name>
            <name name-style="western">
              <surname>Leopold</surname>
              <given-names>JH</given-names>
            </name>
            <name name-style="western">
              <surname>Dongelmans</surname>
              <given-names>DA</given-names>
            </name>
            <name name-style="western">
              <surname>Abu-Hanna</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Schut</surname>
              <given-names>MC</given-names>
            </name>
          </person-group>
          <article-title>Adverse drug event detection using natural language processing: a scoping review of supervised learning methods</article-title>
          <source>PLoS One</source>
          <year>2023</year>
          <month>1</month>
          <day>3</day>
          <volume>18</volume>
          <issue>1</issue>
          <fpage>e0279842</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.plos.org/10.1371/journal.pone.0279842"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0279842</pub-id>
          <pub-id pub-id-type="medline">36595517</pub-id>
          <pub-id pub-id-type="pii">PONE-D-22-08110</pub-id>
          <pub-id pub-id-type="pmcid">PMC9810201</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Spinelli</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Carrano</surname>
              <given-names>FM</given-names>
            </name>
            <name name-style="western">
              <surname>Laino</surname>
              <given-names>ME</given-names>
            </name>
            <name name-style="western">
              <surname>Andreozzi</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Koleth</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Hassan</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Repici</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Chand</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Savevski</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Pellino</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Artificial intelligence in colorectal surgery: an AI-powered systematic review</article-title>
          <source>Tech Coloproctol</source>
          <year>2023</year>
          <month>08</month>
          <day>21</day>
          <volume>27</volume>
          <issue>8</issue>
          <fpage>615</fpage>
          <lpage>629</lpage>
          <pub-id pub-id-type="doi">10.1007/s10151-023-02772-8</pub-id>
          <pub-id pub-id-type="medline">36805890</pub-id>
          <pub-id pub-id-type="pii">10.1007/s10151-023-02772-8</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>van de Burgt</surname>
              <given-names>BWM</given-names>
            </name>
            <name name-style="western">
              <surname>Wasylewicz</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Dullemond</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Grouls</surname>
              <given-names>RJE</given-names>
            </name>
            <name name-style="western">
              <surname>Egberts</surname>
              <given-names>TCG</given-names>
            </name>
            <name name-style="western">
              <surname>Bouwman</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Korsten</surname>
              <given-names>EMM</given-names>
            </name>
          </person-group>
          <article-title>Combining text mining with clinical decision support in clinical practice: a scoping review</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2023</year>
          <month>02</month>
          <day>16</day>
          <volume>30</volume>
          <issue>3</issue>
          <fpage>588</fpage>
          <lpage>603</lpage>
          <pub-id pub-id-type="doi">10.1093/jamia/ocac240</pub-id>
          <pub-id pub-id-type="medline">36512578</pub-id>
          <pub-id pub-id-type="pii">6895525</pub-id>
          <pub-id pub-id-type="pmcid">PMC9933076</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Fu</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Wen</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Ruan</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>He</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Moon</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Mai</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Riaz</surname>
              <given-names>IB</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Warner</surname>
              <given-names>JL</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Assessment of electronic health record for cancer research and patient care through a scoping review of cancer natural language processing</article-title>
          <source>JCO Clin Cancer Inform</source>
          <year>2022</year>
          <month>07</month>
          <volume>6</volume>
          <fpage>e2200006</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://ascopubs.org/doi/10.1200/CCI.22.00006?url_ver=Z39.88-2003&#38;rfr_id=ori:rid:crossref.org&#38;rfr_dat=cr_pub  0pubmed"/>
          </comment>
          <pub-id pub-id-type="doi">10.1200/CCI.22.00006</pub-id>
          <pub-id pub-id-type="medline">35917480</pub-id>
          <pub-id pub-id-type="pmcid">PMC9470142</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Esteva</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Robicquet</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Ramsundar</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Kuleshov</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>DePristo</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Chou</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Cui</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Corrado</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Thrun</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Dean</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>A guide to deep learning in healthcare</article-title>
          <source>Nat Med</source>
          <year>2019</year>
          <month>01</month>
          <day>7</day>
          <volume>25</volume>
          <issue>1</issue>
          <fpage>24</fpage>
          <lpage>29</lpage>
          <pub-id pub-id-type="doi">10.1038/s41591-018-0316-z</pub-id>
          <pub-id pub-id-type="medline">30617335</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41591-018-0316-z</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Shen</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>Q</given-names>
            </name>
          </person-group>
          <article-title>Artificial intelligence in lung cancer diagnosis and prognosis: current application and future perspective</article-title>
          <source>Semin Cancer Biol</source>
          <year>2023</year>
          <month>02</month>
          <volume>89</volume>
          <fpage>30</fpage>
          <lpage>37</lpage>
          <pub-id pub-id-type="doi">10.1016/j.semcancer.2023.01.006</pub-id>
          <pub-id pub-id-type="medline">36682439</pub-id>
          <pub-id pub-id-type="pii">S1044-579X(23)00006-8</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Li</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Weng</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>Z</given-names>
            </name>
          </person-group>
          <article-title>Natural language processing applications for computer-aided diagnosis in oncology</article-title>
          <source>Diagnostics</source>
          <year>2023</year>
          <month>01</month>
          <day>12</day>
          <volume>13</volume>
          <issue>2</issue>
          <fpage>286</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.mdpi.com/resolver?pii=diagnostics13020286"/>
          </comment>
          <pub-id pub-id-type="doi">10.3390/diagnostics13020286</pub-id>
          <pub-id pub-id-type="medline">36673096</pub-id>
          <pub-id pub-id-type="pii">diagnostics13020286</pub-id>
          <pub-id pub-id-type="pmcid">PMC9857980</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Nath</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Marie</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Ellershaw</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Korot</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Keane</surname>
              <given-names>PA</given-names>
            </name>
          </person-group>
          <article-title>New meaning for NLP: the trials and tribulations of natural language processing with GPT-3 in ophthalmology</article-title>
          <source>Br J Ophthalmol</source>
          <year>2022</year>
          <month>07</month>
          <day>06</day>
          <volume>106</volume>
          <issue>7</issue>
          <fpage>889</fpage>
          <lpage>892</lpage>
          <pub-id pub-id-type="doi">10.1136/bjophthalmol-2022-321141</pub-id>
          <pub-id pub-id-type="medline">35523534</pub-id>
          <pub-id pub-id-type="pii">bjophthalmol-2022-321141</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ruby</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <source>DemandSage</source>
          <year>2023</year>
          <access-date>2023-02-23</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.demandsage.com/chatgpt-statistics/#:~:text=13%20million%20individual%20active%20users,than%2057%20million%20monthly%20users">https://www.demandsage.com/chatgpt-statistics/#:~:text=13%20million%20individual%​20active%20users,than%2057%20million%20monthly%20users</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Fan</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Chao</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Tian</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <article-title>Utilization of self-diagnosis health chatbots in real-world settings: case study</article-title>
          <source>J Med Internet Res</source>
          <year>2021</year>
          <month>01</month>
          <day>06</day>
          <volume>23</volume>
          <issue>1</issue>
          <fpage>e19928</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2021/1/e19928/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/19928</pub-id>
          <pub-id pub-id-type="medline">33404508</pub-id>
          <pub-id pub-id-type="pii">v23i1e19928</pub-id>
          <pub-id pub-id-type="pmcid">PMC7817366</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Aboueid</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>RH</given-names>
            </name>
            <name name-style="western">
              <surname>Desta</surname>
              <given-names>BN</given-names>
            </name>
            <name name-style="western">
              <surname>Chaurasia</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Ebrahim</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>The use of artificially intelligent self-diagnosing digital platforms by the general public: scoping review</article-title>
          <source>JMIR Med Inform</source>
          <year>2019</year>
          <month>05</month>
          <day>01</day>
          <volume>7</volume>
          <issue>2</issue>
          <fpage>e13445</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://medinform.jmir.org/2019/2/e13445/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/13445</pub-id>
          <pub-id pub-id-type="medline">31042151</pub-id>
          <pub-id pub-id-type="pii">v7i2e13445</pub-id>
          <pub-id pub-id-type="pmcid">PMC6658267</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lupton</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Jutel</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>'It's like having a physician in your pocket!' A critical analysis of self-diagnosis smartphone apps</article-title>
          <source>Soc Sci Med</source>
          <year>2015</year>
          <month>05</month>
          <volume>133</volume>
          <fpage>128</fpage>
          <lpage>135</lpage>
          <pub-id pub-id-type="doi">10.1016/j.socscimed.2015.04.004</pub-id>
          <pub-id pub-id-type="medline">25864149</pub-id>
          <pub-id pub-id-type="pii">S0277-9536(15)00224-5</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Riboli-Sasco</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>El-Osta</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Alaa</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Webber</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Karki</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>El Asmar</surname>
              <given-names>ML</given-names>
            </name>
            <name name-style="western">
              <surname>Purohit</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Painter</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Hayhoe</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>Triage and diagnostic accuracy of online symptom checkers: systematic review</article-title>
          <source>J Med Internet Res</source>
          <year>2023</year>
          <month>06</month>
          <day>02</day>
          <volume>25</volume>
          <fpage>e43803</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2023//e43803/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/43803</pub-id>
          <pub-id pub-id-type="medline">37266983</pub-id>
          <pub-id pub-id-type="pii">v25i1e43803</pub-id>
          <pub-id pub-id-type="pmcid">PMC10276326</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ryan</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Wilson</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Internet healthcare: do self-diagnosis sites do more harm than good?</article-title>
          <source>Expert Opin Drug Saf</source>
          <year>2008</year>
          <month>05</month>
          <day>08</day>
          <volume>7</volume>
          <issue>3</issue>
          <fpage>227</fpage>
          <lpage>229</lpage>
          <pub-id pub-id-type="doi">10.1517/14740338.7.3.227</pub-id>
          <pub-id pub-id-type="medline">18462181</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Semigran</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Linder</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Gidengil</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Mehrotra</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Evaluation of symptom checkers for self diagnosis and triage: audit study</article-title>
          <source>BMJ</source>
          <year>2015</year>
          <month>07</month>
          <day>08</day>
          <volume>351</volume>
          <fpage>h3480</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.bmj.com/lookup/pmidlookup?view=long&#38;pmid=26157077"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/bmj.h3480</pub-id>
          <pub-id pub-id-type="medline">26157077</pub-id>
          <pub-id pub-id-type="pmcid">PMC4496786</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hirosawa</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Harada</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Yokose</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Sakamoto</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Kawamura</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Shimizu</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Diagnostic accuracy of differential-diagnosis lists generated by generative pretrained transformer 3 chatbot for clinical vignettes with common chief complaints: a pilot study</article-title>
          <source>Int J Environ Res Public Health</source>
          <year>2023</year>
          <month>02</month>
          <day>15</day>
          <volume>20</volume>
          <issue>4</issue>
          <fpage>3378</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.mdpi.com/resolver?pii=ijerph20043378"/>
          </comment>
          <pub-id pub-id-type="doi">10.3390/ijerph20043378</pub-id>
          <pub-id pub-id-type="medline">36834073</pub-id>
          <pub-id pub-id-type="pii">ijerph20043378</pub-id>
          <pub-id pub-id-type="pmcid">PMC9967747</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Johnson</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Goodman</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Patrinely</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Stone</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Zimmerman</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Donald</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Chang</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Berkowitz</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Finn</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Jahangir</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Scoville</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Reese</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Friedman</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Bastarache</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>van der Heijden</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Wright</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Carter</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Alexander</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Choe</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Chastain</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Zic</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Horst</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Turker</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Agarwal</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Osmundson</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Idrees</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Kiernan</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Padmanabhan</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Bailey</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Schlegel</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Chambless</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Gibson</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Osterman</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Wheless</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Assessing the accuracy and reliability of AI-generated medical responses: an evaluation of the Chat-GPT model</article-title>
          <source>Res Sq</source>
          <year>2023</year>
          <month>02</month>
          <day>28</day>
          <fpage>2023</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/36909565"/>
          </comment>
          <pub-id pub-id-type="doi">10.21203/rs.3.rs-2566942/v1</pub-id>
          <pub-id pub-id-type="medline">36909565</pub-id>
          <pub-id pub-id-type="pii">rs.3.rs-2566942</pub-id>
          <pub-id pub-id-type="pmcid">PMC10002821</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>St Sauver</surname>
              <given-names>JL</given-names>
            </name>
            <name name-style="western">
              <surname>Warner</surname>
              <given-names>DO</given-names>
            </name>
            <name name-style="western">
              <surname>Yawn</surname>
              <given-names>BP</given-names>
            </name>
            <name name-style="western">
              <surname>Jacobson</surname>
              <given-names>DJ</given-names>
            </name>
            <name name-style="western">
              <surname>McGree</surname>
              <given-names>ME</given-names>
            </name>
            <name name-style="western">
              <surname>Pankratz</surname>
              <given-names>JJ</given-names>
            </name>
            <name name-style="western">
              <surname>Melton</surname>
              <given-names>LJ</given-names>
            </name>
            <name name-style="western">
              <surname>Roger</surname>
              <given-names>VL</given-names>
            </name>
            <name name-style="western">
              <surname>Ebbert</surname>
              <given-names>JO</given-names>
            </name>
            <name name-style="western">
              <surname>Rocca</surname>
              <given-names>WA</given-names>
            </name>
          </person-group>
          <article-title>Why patients visit their doctors: assessing the most prevalent conditions in a defined American population</article-title>
          <source>Mayo Clin Proc</source>
          <year>2013</year>
          <month>01</month>
          <volume>88</volume>
          <issue>1</issue>
          <fpage>56</fpage>
          <lpage>67</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/23274019"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.mayocp.2012.08.020</pub-id>
          <pub-id pub-id-type="medline">23274019</pub-id>
          <pub-id pub-id-type="pii">S0025-6196(12)01036-1</pub-id>
          <pub-id pub-id-type="pmcid">PMC3564521</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="web">
          <article-title>Carpal tunnel syndrome</article-title>
          <source>Mayo Clinic</source>
          <access-date>2023-02-23</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.mayoclinic.org/diseases-conditions/carpal-tunnel-syndrome/symptoms-causes/syc-20355603">https://www.mayoclinic.org/diseases-conditions/carpal-tunnel-syndrome/symptoms-causes/syc-20355603</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="web">
          <article-title>Spinal stenosis</article-title>
          <source>Mayo Clinic</source>
          <access-date>2023-02-23</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.mayoclinic.org/diseases-conditions/spinal-stenosis/symptoms-causes/syc-20352961">https://www.mayoclinic.org/diseases-conditions/spinal-stenosis/symptoms-causes/syc-20352961</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="web">
          <article-title>Osteoarthritis of the knee</article-title>
          <source>Cleveland Clinic</source>
          <access-date>2023-02-23</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://my.clevelandclinic.org/health/diseases/21750-osteoarthritis-knee">https://my.clevelandclinic.org/health/diseases/21750-osteoarthritis-knee</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="web">
          <article-title>Myelopathy</article-title>
          <source>Cleveland Clinic</source>
          <access-date>2023-02-23</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://my.clevelandclinic.org/health/diseases/21966-myelopathy">https://my.clevelandclinic.org/health/diseases/21966-myelopathy</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="web">
          <article-title>Cervical myelopathy</article-title>
          <source>Johns Hopkins Medicine</source>
          <access-date>2023-02-23</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.hopkinsmedicine.org/health/conditions-and-diseases/cervical-myelopathy">https://www.hopkinsmedicine.org/health/conditions-and-diseases/cervical-myelopathy</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="web">
          <article-title>Hip arthritis</article-title>
          <source>Johns Hopkins Medicine</source>
          <access-date>2023-02-23</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.hopkinsmedicine.org/health/conditions-and-diseases/hip-arthritis">https://www.hopkinsmedicine.org/health/conditions-and-diseases/hip-arthritis</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Saito</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Sozu</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Hamada</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Yoshimura</surname>
              <given-names>I</given-names>
            </name>
          </person-group>
          <article-title>Effective number of subjects and number of raters for inter-rater reliability studies</article-title>
          <source>Stat Med</source>
          <year>2006</year>
          <month>05</month>
          <day>15</day>
          <volume>25</volume>
          <issue>9</issue>
          <fpage>1547</fpage>
          <lpage>1560</lpage>
          <pub-id pub-id-type="doi">10.1002/sim.2294</pub-id>
          <pub-id pub-id-type="medline">16143966</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Light</surname>
              <given-names>RJ</given-names>
            </name>
          </person-group>
          <article-title>Measures of response agreement for qualitative data: some generalizations and alternatives</article-title>
          <source>Psychol Bull</source>
          <year>1971</year>
          <month>11</month>
          <volume>76</volume>
          <issue>5</issue>
          <fpage>365</fpage>
          <lpage>377</lpage>
          <pub-id pub-id-type="doi">10.1037/h0031643</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Landis</surname>
              <given-names>JR</given-names>
            </name>
            <name name-style="western">
              <surname>Koch</surname>
              <given-names>GG</given-names>
            </name>
          </person-group>
          <article-title>The measurement of observer agreement for categorical data</article-title>
          <source>Biometrics</source>
          <year>1977</year>
          <month>03</month>
          <volume>33</volume>
          <issue>1</issue>
          <fpage>159</fpage>
          <pub-id pub-id-type="doi">10.2307/2529310</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Chia</surname>
              <given-names>YK</given-names>
            </name>
            <name name-style="western">
              <surname>Ding</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Bing</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Can ChatGPT-like generative models guarantee factual accuracy? on the mistakes of new generation search engines</article-title>
          <source>arXiv</source>
          <year>2023</year>
          <month>03</month>
          <day>03</day>
          <access-date>2023-08-19</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/2304.11076">https://arxiv.org/abs/2304.11076</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Tan</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Min</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Hu</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Qi</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Evaluation of ChatGPT as a question answering system for answering complex questions</article-title>
          <source>arXiv</source>
          <year>2023</year>
          <month>03</month>
          <day>14</day>
          <access-date>2023-08-19</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/2303.07992">https://arxiv.org/abs/2303.07992</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Else</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Abstracts written by ChatGPT fool scientists</article-title>
          <source>Nature</source>
          <year>2023</year>
          <month>01</month>
          <day>12</day>
          <volume>613</volume>
          <issue>7944</issue>
          <fpage>423</fpage>
          <lpage>423</lpage>
          <pub-id pub-id-type="doi">10.1038/d41586-023-00056-7</pub-id>
          <pub-id pub-id-type="medline">36635510</pub-id>
          <pub-id pub-id-type="pii">10.1038/d41586-023-00056-7</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kitamura</surname>
              <given-names>FC</given-names>
            </name>
          </person-group>
          <article-title>ChatGPT is shaping the future of medical writing but still requires human judgment</article-title>
          <source>Radiology</source>
          <year>2023</year>
          <month>04</month>
          <day>01</day>
          <volume>307</volume>
          <issue>2</issue>
          <fpage>e230171</fpage>
          <pub-id pub-id-type="doi">10.1148/radiol.230171</pub-id>
          <pub-id pub-id-type="medline">36728749</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Májovský</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Černý</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Kasal</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Komarc</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Netuka</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Artificial intelligence can generate fraudulent but authentic-looking scientific medical articles: Pandora's box has been opened</article-title>
          <source>J Med Internet Res</source>
          <year>2023</year>
          <month>05</month>
          <day>31</day>
          <volume>25</volume>
          <fpage>e46924</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2023//e46924/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/46924</pub-id>
          <pub-id pub-id-type="medline">37256685</pub-id>
          <pub-id pub-id-type="pii">v25i1e46924</pub-id>
          <pub-id pub-id-type="pmcid">PMC10267787</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref38">
        <label>38</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ciaccio</surname>
              <given-names>EJ</given-names>
            </name>
          </person-group>
          <article-title>Use of artificial intelligence in scientific paper writing</article-title>
          <source>Inform Med Unlocked</source>
          <year>2023</year>
          <month>4</month>
          <fpage>101253</fpage>
          <pub-id pub-id-type="doi">10.1016/j.imu.2023.101253</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref39">
        <label>39</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Shahsavar</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Choudhury</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>User intentions to use ChatGPT for self-diagnosis and health-related purposes: cross-sectional survey study</article-title>
          <source>JMIR Hum Factors</source>
          <year>2023</year>
          <month>05</month>
          <day>17</day>
          <volume>10</volume>
          <fpage>e47564</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://humanfactors.jmir.org/2023//e47564/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/47564</pub-id>
          <pub-id pub-id-type="medline">37195756</pub-id>
          <pub-id pub-id-type="pii">v10i1e47564</pub-id>
          <pub-id pub-id-type="pmcid">PMC10233444</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref40">
        <label>40</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Cheung</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Sosa</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Koh</surname>
              <given-names>DCI</given-names>
            </name>
          </person-group>
          <article-title>Design self-diagnosis applications for non-patients</article-title>
          <year>2015</year>
          <conf-name>33rd Annual ACM Conference Extended Abstracts on Human Factors in Computing Systems</conf-name>
          <conf-date>April 18-23, 2015</conf-date>
          <conf-loc>Seoul, Korea</conf-loc>
          <pub-id pub-id-type="doi">10.1145/2702613.2732865</pub-id>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
