<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="letter" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMIR</journal-id>
      <journal-id journal-id-type="nlm-ta">J Med Internet Res</journal-id>
      <journal-title>Journal of Medical Internet Research</journal-title>
      <issn pub-type="epub">1438-8871</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v25i1e49240</article-id>
      <article-id pub-id-type="pmid">37695668</article-id>
      <article-id pub-id-type="doi">10.2196/49240</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Research Letter</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Research Letter</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Clinical Accuracy of Large Language Models and Google Search Responses to Postpartum Depression Questions: Cross-Sectional Study</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Leung</surname>
            <given-names>Tiffany</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Santosa</surname>
            <given-names>Anindita</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Whitehead</surname>
            <given-names>David</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Sezgin</surname>
            <given-names>Emre</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>Nationwide Children's Hospital</institution>
            <addr-line>700 Children's Dr</addr-line>
            <addr-line>Columbus, OH, 43205</addr-line>
            <country>United States</country>
            <phone>1 614 722 3179</phone>
            <email>emre.sezgin@nationwidechildrens.org</email>
          </address>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-8798-9605</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author">
          <name name-style="western">
            <surname>Chekeni</surname>
            <given-names>Faraaz</given-names>
          </name>
          <degrees>MD, PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0004-5472-3906</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Lee</surname>
            <given-names>Jennifer</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-0849-1006</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Keim</surname>
            <given-names>Sarah</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff2" ref-type="aff">2</xref>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-3490-3649</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Nationwide Children's Hospital</institution>
        <addr-line>Columbus, OH</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>College of Medicine</institution>
        <institution>The Ohio State University</institution>
        <addr-line>Columbus, OH</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>College of Public Health</institution>
        <institution>The Ohio State University</institution>
        <addr-line>Columbus, OH</addr-line>
        <country>United States</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Emre Sezgin <email>emre.sezgin@nationwidechildrens.org</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <year>2023</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>11</day>
        <month>9</month>
        <year>2023</year>
      </pub-date>
      <volume>25</volume>
      <elocation-id>e49240</elocation-id>
      <history>
        <date date-type="received">
          <day>22</day>
          <month>5</month>
          <year>2023</year>
        </date>
        <date date-type="rev-request">
          <day>16</day>
          <month>7</month>
          <year>2023</year>
        </date>
        <date date-type="rev-recd">
          <day>20</day>
          <month>7</month>
          <year>2023</year>
        </date>
        <date date-type="accepted">
          <day>30</day>
          <month>8</month>
          <year>2023</year>
        </date>
      </history>
      <copyright-statement>©Emre Sezgin, Faraaz Chekeni, Jennifer Lee, Sarah Keim. Originally published in the Journal of Medical Internet Research (https://www.jmir.org), 11.09.2023.</copyright-statement>
      <copyright-year>2023</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in the Journal of Medical Internet Research, is properly cited. The complete bibliographic information, a link to the original publication on https://www.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://www.jmir.org/2023/1/e49240" xlink:type="simple"/>
      <kwd-group>
        <kwd>mental health</kwd>
        <kwd>postpartum depression</kwd>
        <kwd>health information seeking</kwd>
        <kwd>large language model</kwd>
        <kwd>GPT</kwd>
        <kwd>LaMDA</kwd>
        <kwd>Google</kwd>
        <kwd>ChatGPT</kwd>
        <kwd>artificial intelligence</kwd>
        <kwd>natural language processing</kwd>
        <kwd>generative AI</kwd>
        <kwd>depression</kwd>
        <kwd>cross-sectional study</kwd>
        <kwd>clinical accuracy</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <p>Postpartum depression (PPD) affects about 1 in 8 women in the months after delivery [<xref ref-type="bibr" rid="ref1">1</xref>], and most of the affected individuals do not receive help, primarily due to insufficient screening and a lack of awareness about the condition. As large language model (LLM)–supported applications are becoming an integral part of web-based information-seeking behavior, it is necessary to assess the capability and validity of these applications in addressing prevalent mental health conditions [<xref ref-type="bibr" rid="ref2">2</xref>]. In this study, we assessed the quality of LLM-generated responses to frequently asked PPD questions based on clinical accuracy (a contextually appropriate response that reflects current medical knowledge).</p>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <p>We used 2 publicly accessible LLMs, GPT-4 (using ChatGPT) [<xref ref-type="bibr" rid="ref3">3</xref>] and LaMDA (using Bard) [<xref ref-type="bibr" rid="ref4">4</xref>], and Google Search engine. On April 3, 2023, we prompted each model and queried Google with 14 PPD-related patient-focused frequently asked questions sourced from the American College of Obstetricians and Gynecologists (ACOG; <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>) [<xref ref-type="bibr" rid="ref5">5</xref>]. ChatGPT and Bard were prompted with each question in a new single session without prior conversation. Google Search results were not standardized, and search results were displayed in 3 different formats: an information card, curated content (a snippet of text at the top), and top search results (list of links with brief information snippets including sponsored content). We analyzed only Google interface-based feedback to be consistent (the first response without link navigation).</p>
      <p>Two board-certified physicians (author JL is board certified in pediatrics and pediatric gastroenterology and author FC is board certified in pediatrics) compared the LLM responses and Google Search results to the ACOG FAQ responses and rated the quality of responses using a GRADE (Grading of Recommendations Assessment, Development and Evaluation)-informed scale [<xref ref-type="bibr" rid="ref6">6</xref>]. We calculated Cohen κ coefficient to measure interrater reliability. We tested the normality (Shapiro-Wilk test) and homoscedasticity (Levene test) of the rater data, followed by the Kruskal-Wallis test to compare the differences in the quality rating among the 3 groups. The pairs of groups were investigated for significant differences by post hoc Dunn test with Bonferroni correction (for multiple comparisons). Analyses used R software (v4.2.1; R Foundation of Statistical Computing) [<xref ref-type="bibr" rid="ref7">7</xref>].</p>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <p>ChatGPT differed in the quality of responses against others (mean 3.93, SD 0.27; <xref ref-type="table" rid="table1">Table 1</xref>). A statistically significant difference in the distribution of scores among the categories was found (<italic>χ</italic><sup>2</sup><sub>2</sub>=12.2; <italic>P</italic>=.002; <xref ref-type="table" rid="table2">Table 2</xref>). ChatGPT demonstrated generally higher quality (more clinically accurate) responses compared to Bard (<italic>Z</italic>=2.143; adjusted <italic>P</italic>=.048) and Google Search (<italic>Z</italic>=3.464; adjusted <italic>P</italic>&#60;.001). There was no difference in the quality of responses between Bard and Google Search (<italic>Z</italic>=1.320; adjusted <italic>P</italic>=.28).</p>
      <p>Raters showed perfect agreement for ChatGPT (κ=1, 95% CI 0.85-1.15) and near-perfect agreement for Bard and Google Search (κ=0.92, 95% CI 0.71-1.13). Data were not normally distributed (<italic>P</italic>&#60;.05) and nonhomoscedastic (<italic>F</italic><sub>2</sub>=4.153; <italic>P</italic>=.02) for each category (ChatGPT, Bard, and Google Search).</p>
      <table-wrap position="float" id="table1">
        <label>Table 1</label>
        <caption>
          <p>Average quality ratings for ChatGPT, Bard, and Google Search responses to American College of Obstetricians and Gynecologists (ACOG) questions [<xref ref-type="bibr" rid="ref5">5</xref>].</p>
        </caption>
        <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
          <col width="600"/>
          <col width="120"/>
          <col width="120"/>
          <col width="160"/>
          <thead>
            <tr valign="bottom">
              <td>ACOG postpartum depression frequently asked questions</td>
              <td colspan="3">Average quality ratings<sup>a</sup></td>
            </tr>
            <tr valign="bottom">
              <td>
                <break/>
              </td>
              <td>ChatGPT</td>
              <td>Bard</td>
              <td>Google Search</td>
            </tr>
          </thead>
          <tbody>
            <tr valign="top">
              <td>What are baby blues?</td>
              <td>4</td>
              <td>4</td>
              <td>3</td>
            </tr>
            <tr valign="top">
              <td>Can antidepressants cause side effects?</td>
              <td>4</td>
              <td>0</td>
              <td>3</td>
            </tr>
            <tr valign="top">
              <td>How is postpartum depression treated?</td>
              <td>4</td>
              <td>4</td>
              <td>4</td>
            </tr>
            <tr valign="top">
              <td>How long do the baby blues usually last?</td>
              <td>4</td>
              <td>4</td>
              <td>1</td>
            </tr>
            <tr valign="top">
              <td>If I think I have postpartum depression, when should I see my health care professional?</td>
              <td>4</td>
              <td>4</td>
              <td>1</td>
            </tr>
            <tr valign="top">
              <td>What are antidepressants?</td>
              <td>4</td>
              <td>0</td>
              <td>3.5</td>
            </tr>
            <tr valign="top">
              <td>Can antidepressants be passed to my baby through my breast milk?</td>
              <td>4</td>
              <td>0</td>
              <td>3</td>
            </tr>
            <tr valign="top">
              <td>What are the types of talk therapy?</td>
              <td>4</td>
              <td>4</td>
              <td>3</td>
            </tr>
            <tr valign="top">
              <td>What can be done to help prevent postpartum depression in women with a history of depression?</td>
              <td>3</td>
              <td>4</td>
              <td>1</td>
            </tr>
            <tr valign="top">
              <td>What causes postpartum depression?</td>
              <td>4</td>
              <td>0</td>
              <td>1</td>
            </tr>
            <tr valign="top">
              <td>What happens in talk therapy?</td>
              <td>4</td>
              <td>4</td>
              <td>4</td>
            </tr>
            <tr valign="top">
              <td>What is postpartum depression?</td>
              <td>4</td>
              <td>4</td>
              <td>4</td>
            </tr>
            <tr valign="top">
              <td>What support is available to help me cope with postpartum depression?</td>
              <td>4</td>
              <td>3</td>
              <td>1</td>
            </tr>
            <tr valign="top">
              <td>When does postpartum depression occur?</td>
              <td>4</td>
              <td>3.5</td>
              <td>1</td>
            </tr>
            <tr valign="top">
              <td>Mean (SD)</td>
              <td>3.93 (0.27)</td>
              <td>2.75 (1.83)</td>
              <td>2.39 (1.3)</td>
            </tr>
            <tr valign="top">
              <td>Median (IQR)</td>
              <td>4 (4-4)</td>
              <td>4 (0-4)</td>
              <td>3 (1-4)</td>
            </tr>
            <tr valign="top">
              <td>Mode</td>
              <td>4</td>
              <td>4</td>
              <td>1</td>
            </tr>
            <tr valign="top">
              <td>Minimum-maximum</td>
              <td>3-4</td>
              <td>0-4</td>
              <td>1-4</td>
            </tr>
          </tbody>
        </table>
        <table-wrap-foot>
          <fn id="table1fn1">
            <p><sup>a</sup>GRADE (Grading of Recommendations Assessment, Development and Evaluation)-informed quality assessment scale [<xref ref-type="bibr" rid="ref6">6</xref>]: 0=no response (the system refused to provide any information), 1=inaccurate response (the system response does not reflect any facts relevant to the corresponding question), 2=clinically inaccurate response (the system response includes facts about the corresponding question but is not clinically relevant), 3=partially clinically accurate response (the system response is accurate and clinically relevant, yet it introduces some risks in terms of misinterpretations and misunderstanding), 4=mostly clinically accurate response (the system response is accurate and clinically relevant, and risk is minimal for misinterpretations and misunderstanding).</p>
          </fn>
        </table-wrap-foot>
      </table-wrap>
      <table-wrap position="float" id="table2">
        <label>Table 2</label>
        <caption>
          <p>Results of nonparametric test to identify significant differences between categories (Kruskal-Wallis) and post hoc pairwise comparison to determine differing categories (Dunn test).</p>
        </caption>
        <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
          <col width="30"/>
          <col width="500"/>
          <col width="160"/>
          <col width="310"/>
          <thead>
            <tr valign="top">
              <td colspan="2">Test</td>
              <td>Value</td>
              <td>Adjusted <italic>P</italic> value</td>
            </tr>
          </thead>
          <tbody>
            <tr valign="top">
              <td colspan="4">
                <bold>Kruskal-Wallis</bold>
              </td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Chi-square (<italic>df</italic>)</td>
              <td>12.2 (2)</td>
              <td>.002<sup>a</sup></td>
            </tr>
            <tr valign="top">
              <td colspan="4">
                <bold>Dunn Test</bold>
              </td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>ChatGPT vs Bard, <italic>Z</italic> value</td>
              <td>2.143</td>
              <td>.048<sup>a</sup></td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>ChatGPT vs Google Search, <italic>Z</italic> value</td>
              <td>3.464</td>
              <td>&#60;.001</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Bard vs Google Search, <italic>Z</italic> value</td>
              <td>1.320</td>
              <td>.28</td>
            </tr>
          </tbody>
        </table>
        <table-wrap-foot>
          <fn id="table2fn1">
            <p><sup>a</sup><italic>P</italic>&#60;.05.</p>
          </fn>
          <fn id="table2fn2">
            <p><sup>b</sup><italic>P</italic>&#60;.001.</p>
          </fn>
        </table-wrap-foot>
      </table-wrap>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <p>This study expands an earlier investigation on chatbot advice for PPD [<xref ref-type="bibr" rid="ref8">8</xref>], showing that LLMs can provide clinically accurate responses to questions regarding PPD. ChatGPT provides higher-quality responses based on concordance with answers provided in the ACOG FAQ. The quality of Bard responses was high when provided, but its overall score was impacted by no-response answers (which were mostly factual in nature rather than seeking medical advice, eg, “what are antidepressants?”). These responses received the lowest quality score in our rating. Almost all of the responses by Bard and ChatGPT did not provide a source for the information in their responses (only one response included a source). However, many responses recommended consulting a health care provider or mental health professional in some capacity. Google Search results were rated as lower-than-average quality compared to Bard and ChatGPT.</p>
      <p>Overall, LLMs showed promise in terms of providing clinically accurate or better-quality responses than Google Search results. This finding is consistent with the prior investigation on the appropriateness of LLM-based medical advice [<xref ref-type="bibr" rid="ref9">9</xref>]. Our findings should be interpreted carefully considering the following limitations. To start, none of these technologies are built for medical purposes. We included a limited number of standard questions (14 ACOG questions) analyzed within a limited scope (one question per category; no personas, eg, “act like a doctor”; no prompt engineering for exploring different contexts or settings). Future work is needed for a more comprehensive investigation (eg, measuring acceptability and empathy with stakeholders) as well as to develop clinical guidance (frameworks in close collaboration among clinicians, researchers, and developers) to inform the implementation and evaluation of such technologies, ensuring their ability to address PPD-related questions accurately, ethically, and safely [<xref ref-type="bibr" rid="ref10">10</xref>].</p>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>Responses to postpartum depression frequently asked questions.</p>
        <media xlink:href="jmir_v25i1e49240_app1.xlsx" xlink:title="XLSX File  (Microsoft Excel File), 26 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">ACOG</term>
          <def>
            <p>American College of Obstetricians and Gynecologists</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">GRADE</term>
          <def>
            <p>Grading of Recommendations Assessment, Development and Evaluation</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">LLM</term>
          <def>
            <p>large language model</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">PPD</term>
          <def>
            <p>postpartum depression</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <notes>
      <sec>
        <title>Data Availability</title>
        <p>All data generated or analyzed during this study are included in this published article (<xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>).</p>
      </sec>
    </notes>
    <fn-group>
      <fn fn-type="con">
        <p>ES led the conceptualization, method development, data curation, and drafting of the manuscript. FC and JL performed the formal analysis. All authors participated in the investigation and validation processes. The project was supervised by ES and SK. The manuscript was reviewed and edited by all authors, who also approved its final version.</p>
      </fn>
      <fn fn-type="conflict">
        <p>FC owned shares of Google (GOOGL) during the study period.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="web">
          <article-title>Depression during and after pregnancy</article-title>
          <source>Centers for Disease Control and Prevention</source>
          <year>2023</year>
          <access-date>2023-05-17</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.cdc.gov/reproductivehealth/features/maternal-depression/index.html">https://www.cdc.gov/reproductivehealth/features/maternal-depression/index.html</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sharma</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>IW</given-names>
            </name>
            <name name-style="western">
              <surname>Miner</surname>
              <given-names>AS</given-names>
            </name>
            <name name-style="western">
              <surname>Atkins</surname>
              <given-names>DC</given-names>
            </name>
            <name name-style="western">
              <surname>Althoff</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Human–AI collaboration enables more empathic conversations in text-based peer-to-peer mental health support</article-title>
          <source>Nat Machine Intelligence</source>
          <year>2023</year>
          <month>01</month>
          <day>23</day>
          <volume>5</volume>
          <issue>1</issue>
          <fpage>46</fpage>
          <lpage>57</lpage>
          <pub-id pub-id-type="doi">10.1038/s42256-022-00593-2</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="web">
          <article-title>GPT-4</article-title>
          <source>OpenAI</source>
          <access-date>2023-04-25</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://openai.com/product/gpt-4">https://openai.com/product/gpt-4</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Collins</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Ghahramani</surname>
              <given-names>Z</given-names>
            </name>
          </person-group>
          <article-title>LaMDA: our breakthrough conversation technology</article-title>
          <source>The Keyword</source>
          <year>2021</year>
          <access-date>2023-09-06</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://blog.google/technology/ai/lamda/">https://blog.google/technology/ai/lamda/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="web">
          <article-title>Postpartum depression</article-title>
          <source>American College of Obstetricians and Gynecologists</source>
          <access-date>2023-05-15</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.acog.org/womens-health/faqs/postpartum-depression">https://www.acog.org/womens-health/faqs/postpartum-depression</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Guyatt</surname>
              <given-names>GH</given-names>
            </name>
            <name name-style="western">
              <surname>Oxman</surname>
              <given-names>AD</given-names>
            </name>
            <name name-style="western">
              <surname>Kunz</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Vist</surname>
              <given-names>GE</given-names>
            </name>
            <name name-style="western">
              <surname>Falck-Ytter</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Schünemann</surname>
              <given-names>HJ</given-names>
            </name>
            <collab>GRADE Working Group</collab>
          </person-group>
          <article-title>What is "quality of evidence" and why is it important to clinicians?</article-title>
          <source>BMJ</source>
          <year>2008</year>
          <month>05</month>
          <day>03</day>
          <volume>336</volume>
          <issue>7651</issue>
          <fpage>995</fpage>
          <lpage>8</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/18456631"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/bmj.39490.551019.BE</pub-id>
          <pub-id pub-id-type="medline">18456631</pub-id>
          <pub-id pub-id-type="pii">336/7651/995</pub-id>
          <pub-id pub-id-type="pmcid">PMC2364804</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ripley</surname>
              <given-names>BD</given-names>
            </name>
          </person-group>
          <article-title>The R Project in Statistical Computing</article-title>
          <source>MSOR Connections</source>
          <year>2001</year>
          <month>02</month>
          <volume>1</volume>
          <issue>1</issue>
          <fpage>23</fpage>
          <lpage>25</lpage>
          <pub-id pub-id-type="doi">10.11120/msor.2001.01010023</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Sezgin</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Bridge</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Clinical advice by voice assistants on postpartum depression: cross-sectional investigation using Apple Siri, Amazon Alexa, Google Assistant, and Microsoft Cortana</article-title>
          <source>JMIR Mhealth Uhealth</source>
          <year>2021</year>
          <month>01</month>
          <day>11</day>
          <volume>9</volume>
          <issue>1</issue>
          <fpage>e24045</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://mhealth.jmir.org/2021/1/e24045/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/24045</pub-id>
          <pub-id pub-id-type="medline">33427680</pub-id>
          <pub-id pub-id-type="pii">v9i1e24045</pub-id>
          <pub-id pub-id-type="pmcid">PMC7834933</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sarraju</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Bruemmer</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Van Iterson</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Cho</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Rodriguez</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Laffin</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Appropriateness of cardiovascular disease prevention recommendations obtained from a popular online chat-based artificial intelligence model</article-title>
          <source>JAMA</source>
          <year>2023</year>
          <month>03</month>
          <day>14</day>
          <volume>329</volume>
          <issue>10</issue>
          <fpage>842</fpage>
          <lpage>844</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/36735264"/>
          </comment>
          <pub-id pub-id-type="doi">10.1001/jama.2023.1044</pub-id>
          <pub-id pub-id-type="medline">36735264</pub-id>
          <pub-id pub-id-type="pii">2801244</pub-id>
          <pub-id pub-id-type="pmcid">PMC10015303</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Aronson</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Lieu</surname>
              <given-names>TW</given-names>
            </name>
            <name name-style="western">
              <surname>Scirica</surname>
              <given-names>BM</given-names>
            </name>
          </person-group>
          <article-title>Getting generative AI right</article-title>
          <source>NEJM Catalyst</source>
          <year>2023</year>
          <fpage>1</fpage>
          <pub-id pub-id-type="doi">10.1056/CAT.23.0063</pub-id>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
