<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="letter" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMIR</journal-id>
      <journal-id journal-id-type="nlm-ta">J Med Internet Res</journal-id>
      <journal-title>Journal of Medical Internet Research</journal-title>
      <issn pub-type="epub">1438-8871</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v27i1e64993</article-id>
      <article-id pub-id-type="pmid">39869899</article-id>
      <article-id pub-id-type="doi">10.2196/64993</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Research Letter</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Research Letter</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Accuracy and Safety of AI-Enabled Scribe Technology: Instrument Validation Study</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Coristine</surname>
            <given-names>Andrew</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Sharma</surname>
            <given-names>Deepika</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Warg</surname>
            <given-names>Fredrik</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Biro</surname>
            <given-names>Joshua</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution/>
            <institution>National Center for Human Factors in Healthcare</institution>
            <institution>MedStar Health Research Institute</institution>
            <addr-line>3007 Tilden St NW</addr-line>
            <addr-line>Washington, DC, 20008</addr-line>
            <country>United States</country>
            <phone>1 3015423073</phone>
            <email>joshua.m.biro@medstar.net</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-7362-4138</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author">
          <name name-style="western">
            <surname>Handley</surname>
            <given-names>Jessica L</given-names>
          </name>
          <degrees>MA</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-1594-8171</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Cobb</surname>
            <given-names>Nathan K</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-4210-226X</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Kottamasu</surname>
            <given-names>Varsha</given-names>
          </name>
          <degrees>MHA</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0004-2749-5637</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author">
          <name name-style="western">
            <surname>Collins</surname>
            <given-names>Jeffrey</given-names>
          </name>
          <degrees>MHS</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0008-1623-1256</ext-link>
        </contrib>
        <contrib id="contrib6" contrib-type="author">
          <name name-style="western">
            <surname>Krevat</surname>
            <given-names>Seth</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-0167-8791</ext-link>
        </contrib>
        <contrib id="contrib7" contrib-type="author">
          <name name-style="western">
            <surname>Ratwani</surname>
            <given-names>Raj M</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-8623-6123</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>National Center for Human Factors in Healthcare</institution>
        <institution>MedStar Health Research Institute</institution>
        <addr-line>Washington, DC</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>MedStar Health Institute for Innovation</institution>
        <addr-line>Washington, DC</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>Georgetown University School of Medicine</institution>
        <addr-line>Washington, DC</addr-line>
        <country>United States</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Joshua Biro <email>joshua.m.biro@medstar.net</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <year>2025</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>27</day>
        <month>1</month>
        <year>2025</year>
      </pub-date>
      <volume>27</volume>
      <elocation-id>e64993</elocation-id>
      <history>
        <date date-type="received">
          <day>1</day>
          <month>8</month>
          <year>2024</year>
        </date>
        <date date-type="rev-request">
          <day>24</day>
          <month>9</month>
          <year>2024</year>
        </date>
        <date date-type="rev-recd">
          <day>3</day>
          <month>10</month>
          <year>2024</year>
        </date>
        <date date-type="accepted">
          <day>9</day>
          <month>12</month>
          <year>2024</year>
        </date>
      </history>
      <copyright-statement>©Joshua Biro, Jessica L Handley, Nathan K Cobb, Varsha Kottamasu, Jeffrey Collins, Seth Krevat, Raj M Ratwani. Originally published in the Journal of Medical Internet Research (https://www.jmir.org), 27.01.2025.</copyright-statement>
      <copyright-year>2025</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in the Journal of Medical Internet Research (ISSN 1438-8871), is properly cited. The complete bibliographic information, a link to the original publication on https://www.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://www.jmir.org/2025/1/e64993" xlink:type="simple"/>
      <abstract>
        <p>Artificial intelligence–enabled ambient digital scribes may have many potential benefits, yet results from our study indicate that there are errors that must be evaluated to mitigate safety risks.</p>
      </abstract>
      <kwd-group>
        <kwd>artificial intelligence</kwd>
        <kwd>AI</kwd>
        <kwd>patient safety</kwd>
        <kwd>ambient digital scribe</kwd>
        <kwd>AI-enabled scribe technology</kwd>
        <kwd>AI scribe technology</kwd>
        <kwd>scribe technology</kwd>
        <kwd>accuracy</kwd>
        <kwd>safety</kwd>
        <kwd>ambient scribe</kwd>
        <kwd>digital scribe</kwd>
        <kwd>patient-clinician</kwd>
        <kwd>patient-clinician communication</kwd>
        <kwd>doctor-patient relationship</kwd>
        <kwd>doctor-patient communication</kwd>
        <kwd>patient engagement</kwd>
        <kwd>patient safety</kwd>
        <kwd>dialogue script</kwd>
        <kwd>scribe</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <p>Generative artificial intelligence (AI)–enabled ambient digital scribe (ADS) technology uses the patient-clinician conversation to generate clinical documentation; it has the potential to improve patient engagement and reduce clinician burden [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref2">2</xref>]. These technologies are becoming more prevalent, especially in ambulatory care settings, yet there is little known about documentation accuracy and the types of errors that may stem from ADS use [<xref ref-type="bibr" rid="ref3">3</xref>]. Error-prone ADS technology may have serious patient safety consequences [<xref ref-type="bibr" rid="ref4">4</xref>]. We evaluated 2 popular commercially available ADS products in a simulated setting to systematically identify the frequency and pattern of documentation errors.</p>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Ethical Considerations</title>
        <p>This study was approved by the MedStar Health Institutional Review Board (00007789) to cover secondary analysis of existing patient data without additional consent. All data were deidentified. Participants did not receive any form of compensation.</p>
      </sec>
      <sec>
        <title>Recording and Simulation</title>
        <p>Recordings of 11 real outpatient encounters from a range of service lines (otolaryngology, cardiology, rheumatology, family medicine, pediatrics, endocrinology, internal medicine, gastroenterology, oncology, and urgent care) were transcribed by automated software and then deidentified and edited by a senior physician (NKC) for clarity to create 11 unique dialogue scripts. The dialogue scripts were used to evaluate 2 commercial ADS products. For each script, a researcher (JB or VK) simulating the patient and a medical resident simulating the physician read from the script while the ADS products were in use. Each script was read by 2 different residents per ADS product, yielding 22 draft notes per product and 44 draft notes in total across products. The residents reviewed the draft notes to identify errors. Each error was independently categorized by 2 reviewers (JB or JLH) as either an omission, addition, wrong output, or irrelevant or misplaced text, as defined in <xref ref-type="table" rid="table1">Table 1</xref>. Disagreements were discussed to reach consensus.</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <p>There were 127 errors (mean 2.9, SD 2.7 errors per draft note) in 31 of 44 (70%) draft notes. ADS product A resulted in 66 errors (mean 3, SD 2.7 per draft note) and product B resulted in 61 errors (mean 2.8, SD 2.7 per draft note). Error frequency by error type and product is detailed in <xref ref-type="table" rid="table1">Table 1</xref>, with omission errors being the most frequent across products. Error types significantly differed between the 2 ADS products (Fisher exact test: <italic>P</italic>=.002).</p>
      <table-wrap position="float" id="table1">
        <label>Table 1</label>
        <caption>
          <p>Frequency counts, percentages, definitions, and examples of ambient digital scribe (ADS) error types.</p>
        </caption>
        <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
          <col width="130"/>
          <col width="130"/>
          <col width="130"/>
          <col width="300"/>
          <col width="310"/>
          <thead>
            <tr valign="top">
              <td>Error type</td>
              <td colspan="2">Errors by ADS product, n (%)</td>
              <td>Definition</td>
              <td>Example</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Product A (n=66)</td>
              <td>Product B (n=61)</td>
              <td>
                <break/>
              </td>
              <td>
                <break/>
              </td>
            </tr>
          </thead>
          <tbody>
            <tr valign="top">
              <td>Omission</td>
              <td>55 (83)</td>
              <td>33 (54)</td>
              <td>Model leaves out key information from its response</td>
              <td>“[N]o laterality mentioned in ears in physical exam section”</td>
            </tr>
            <tr valign="top">
              <td>Addition</td>
              <td>3 (4)</td>
              <td>7 (11)</td>
              <td>Model adds inappropriate or irrelevant information</td>
              <td>“Patient doesn’t refer to any flare ups in awhile, but the note shared that patient was using X medication to help with flare ups in HPI”</td>
            </tr>
            <tr valign="top">
              <td>Wrong output</td>
              <td>4 (6)</td>
              <td>6 (10)</td>
              <td>Model provides an incorrect response</td>
              <td>“[A]ssociated the wrong test with the contrast”</td>
            </tr>
            <tr valign="top">
              <td>Irrelevant or misplaced text</td>
              <td>4 (6)</td>
              <td>15 (25)</td>
              <td>Model output is technically correct but not appropriate in clinical context</td>
              <td>“[C]aptured all the supplemental information (asthma, mammogram, etc.) and harped on the steroid injections which doesn’t matter”</td>
            </tr>
          </tbody>
        </table>
      </table-wrap>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <p>While ADS technologies may have potential benefits, there are frequent errors in the generated note. Across both products, errors of omission were the most common; this error type may be the most difficult for clinicians to identify since the identification process requires memory recall of details from the patient encounter. If clinicians review their documentation after several patient encounters, recalling omitted details may be challenging. It may be easier to identify errors such as additions and wrong outputs since this relies on recognition of an issue in the text being presented to the clinician. Notably, there was a different pattern of errors between the two products.</p>
      <p>There are limitations to this study. The ADS technologies were evaluated against a limited number of patient cases in a controlled environment that did not fully represent clinical workflows. In addition, the cases were read by 2 researchers acting as patients, and it is likely that both clinicians and patients would have more variability in language, tone, volume, and many other characteristics that could impact ADS accuracy.</p>
      <p>It is imperative that ADS technologies be evaluated in a realistic clinical setting (either in situ or in a representative simulation) to determine the frequency and types of errors so that appropriate risk mitigation and safety plans can be developed. Developing methods to capture AI-related safety issues was a component of President Joe Biden’s “Executive Order on the Safe, Secure, and Trustworthy Development and Use of Artificial Intelligence” [<xref ref-type="bibr" rid="ref5">5</xref>], and robust processes for AI safety are needed. In the absence of a standardized evaluation framework, health care facilities currently bear the burden of testing and reporting these results in the United States. It is to be noted that, effective August 2024, the European Union Artificial Intelligence Act legally requires developers of AI-based systems to evaluate the safety of their products [<xref ref-type="bibr" rid="ref6">6</xref>]. While a step in the right direction, the underlying vendor algorithms are often proprietary, opaque, and the subject of continuous innovation; thus, there is still a need for independent ongoing testing to confirm vendor claims of safety. Future work should develop a robust, standardized, and repeatable ADS evaluation framework to facilitate efficient knowledge sharing in this fast-paced, decentralized system.</p>
    </sec>
  </body>
  <back>
    <app-group/>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">ADS</term>
          <def>
            <p>ambient digital scribe</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">AI</term>
          <def>
            <p>artificial intelligence</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>We would like to acknowledge Dr Sahithi Reddy and Dr James Mickler for their assistance in executing this work.</p>
    </ack>
    <notes>
      <sec>
        <title>Data Availability</title>
        <p>The data sets generated during and/or analyzed during this study are available from the corresponding author on reasonable request.</p>
      </sec>
    </notes>
    <fn-group>
      <fn fn-type="con">
        <p>JB contributed to study design, data acquisition, data analysis, interpretation, drafting, and reviewing the manuscript. JLH contributed to study design, interpretation, drafting, and critically reviewing the manuscript. NKC contributed to study conception, design, data acquisition, interpretation, drafting, and critically reviewing the manuscript. JC contributed to study conception, data acquisition, and critically reviewing the manuscript. SK contributed to study design, data analysis, interpretation, and critically reviewing the manuscript. VK contributed to data acquisition, interpretation, and critically reviewing the manuscript. RMR contributed to study conception, study design, interpretation, and critically reviewing the manuscript.</p>
      </fn>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Tierney</surname>
              <given-names>AA</given-names>
            </name>
            <name name-style="western">
              <surname>Gayre</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Hoberman</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Mattern</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Ballesca</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Kipnis</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Ambient artificial intelligence scribes to alleviate the burden of clinical documentation</article-title>
          <source>NEJM Catalyst</source>
          <year>2024</year>
          <month>02</month>
          <day>21</day>
          <volume>5</volume>
          <issue>3</issue>
          <fpage>692</fpage>
          <lpage>694</lpage>
          <pub-id pub-id-type="doi">10.1056/CAT.23.0404</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>van Buchem</surname>
              <given-names>MM</given-names>
            </name>
            <name name-style="western">
              <surname>Kant</surname>
              <given-names>IMJ</given-names>
            </name>
            <name name-style="western">
              <surname>King</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Kazmaier</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Steyerberg</surname>
              <given-names>EW</given-names>
            </name>
            <name name-style="western">
              <surname>Bauer</surname>
              <given-names>MP</given-names>
            </name>
          </person-group>
          <article-title>Impact of a digital scribe system on clinical documentation time and quality: usability study</article-title>
          <source>JMIR AI</source>
          <year>2024</year>
          <month>09</month>
          <day>23</day>
          <volume>3</volume>
          <fpage>e60020</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://ai.jmir.org/2024//e60020/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/60020</pub-id>
          <pub-id pub-id-type="medline">39312397</pub-id>
          <pub-id pub-id-type="pii">v3i1e60020</pub-id>
          <pub-id pub-id-type="pmcid">PMC11459111</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Seth</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Carretas</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Rudzicz</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <article-title>The utility and implications of ambient scribes in primary care</article-title>
          <source>JMIR AI</source>
          <year>2024</year>
          <month>10</month>
          <day>04</day>
          <volume>3</volume>
          <fpage>e57673</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://ai.jmir.org/2024//e57673/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/57673</pub-id>
          <pub-id pub-id-type="medline">39365655</pub-id>
          <pub-id pub-id-type="pii">v3i1e57673</pub-id>
          <pub-id pub-id-type="pmcid">PMC11489790</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>van Buchem</surname>
              <given-names>MM</given-names>
            </name>
            <name name-style="western">
              <surname>Boosman</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Bauer</surname>
              <given-names>MP</given-names>
            </name>
            <name name-style="western">
              <surname>Kant</surname>
              <given-names>IMJ</given-names>
            </name>
            <name name-style="western">
              <surname>Cammel</surname>
              <given-names>SA</given-names>
            </name>
            <name name-style="western">
              <surname>Steyerberg</surname>
              <given-names>EW</given-names>
            </name>
          </person-group>
          <article-title>The digital scribe in clinical practice: a scoping review and research agenda</article-title>
          <source>NPJ Digit Med</source>
          <year>2021</year>
          <month>03</month>
          <day>26</day>
          <volume>4</volume>
          <issue>1</issue>
          <fpage>57</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/s41746-021-00432-5"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41746-021-00432-5</pub-id>
          <pub-id pub-id-type="medline">33772070</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41746-021-00432-5</pub-id>
          <pub-id pub-id-type="pmcid">PMC7997964</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Biden</surname>
              <given-names>JR</given-names>
            </name>
          </person-group>
          <article-title>Executive order on the safe, secure, and trustworthy development and use of artificial intelligence</article-title>
          <source>The White House</source>
          <year>2023</year>
          <access-date>2025-01-07</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.whitehouse.gov/briefing-room/presidential-actions/2023/10/30/executive-order-on-the-safe-secure-and-trustworthy-development-and-use-of-artificial-intelligence/">https://www.whitehouse.gov/briefing-room/presidential-actions/2023/10/30/executive-order-on-the-safe-secure-and-trustworthy-development-and-use-of-artificial-intelligence/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="web">
          <article-title>Regulation (EU) 2024/1689 of the European Parliament and of the Council of 13 June 2024 laying down harmonised rules on artificial intelligence and amending Regulations (EC) No 300/2008, (EU) No 167/2013, (EU) No 168/2013, (EU) 2018/858, (EU) 2018/1139 and (EU) 2019/2144 and Directives 2014/90/EU, (EU) 2016/797 and (EU) 2020/1828. 2024/1689, 32024R1689</article-title>
          <source>European Union</source>
          <year>2024</year>
          <month>6</month>
          <day>13</day>
          <access-date>2024-09-27</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://artificialintelligenceact.eu/ai-act-explorer/">https://artificialintelligenceact.eu/ai-act-explorer/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
