<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="review-article"><front><journal-meta><journal-id journal-id-type="nlm-ta">J Med Internet Res</journal-id><journal-id journal-id-type="publisher-id">jmir</journal-id><journal-id journal-id-type="index">1</journal-id><journal-title>Journal of Medical Internet Research</journal-title><abbrev-journal-title>J Med Internet Res</abbrev-journal-title><issn pub-type="epub">1438-8871</issn><publisher><publisher-name>JMIR Publications</publisher-name><publisher-loc>Toronto, Canada</publisher-loc></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">v28i1e82756</article-id><article-id pub-id-type="doi">10.2196/82756</article-id><article-categories><subj-group subj-group-type="heading"><subject>Review</subject></subj-group></article-categories><title-group><article-title>GenAI-Supported Virtual Patients in Health Care Education: Systematic Review</article-title></title-group><contrib-group><contrib contrib-type="author"><name name-style="western"><surname>Jiang</surname><given-names>Juming</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff1"/></contrib><contrib contrib-type="author"><name name-style="western"><surname>Ye</surname><given-names>Megan Zichen</given-names></name><degrees>MSc</degrees><xref ref-type="aff" rid="aff1"/></contrib><contrib contrib-type="author"><name name-style="western"><surname>Kwok</surname><given-names>Tyrone Tai-On</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff1"/></contrib><contrib contrib-type="author" corresp="yes"><name name-style="western"><surname>Wong</surname><given-names>Janet Yuen Ha</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff1"/></contrib></contrib-group><aff id="aff1"><institution>School of Nursing and Health Sciences, Jockey Club Institute of Healthcare, Hong Kong Metropolitan University</institution><addr-line>11th Floor, 1 Sheung Shing Street, Homantin, Kowloon</addr-line><addr-line>Hong Kong</addr-line><country>China (Hong Kong)</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Brini</surname><given-names>Stefano</given-names></name></contrib></contrib-group><contrib-group><contrib contrib-type="reviewer"><name name-style="western"><surname>Cunha</surname><given-names>Daniel</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Funghetto</surname><given-names>Silvana</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to Janet Yuen Ha Wong, PhD, School of Nursing and Health Sciences, Jockey Club Institute of Healthcare, Hong Kong Metropolitan University, 11th Floor, 1 Sheung Shing Street, Homantin, Kowloon, Hong Kong, China (Hong Kong), 852 39702988; <email>jyhwong@hkmu.edu.hk</email></corresp></author-notes><pub-date pub-type="collection"><year>2026</year></pub-date><pub-date pub-type="epub"><day>7</day><month>5</month><year>2026</year></pub-date><volume>28</volume><elocation-id>e82756</elocation-id><history><date date-type="received"><day>21</day><month>08</month><year>2025</year></date><date date-type="rev-recd"><day>27</day><month>03</month><year>2026</year></date><date date-type="accepted"><day>31</day><month>03</month><year>2026</year></date></history><copyright-statement>&#x00A9; Juming Jiang, Megan Zichen Ye, Tyrone Tai-On Kwok, Janet Yuen Ha Wong. Originally published in the Journal of Medical Internet Research (<ext-link ext-link-type="uri" xlink:href="https://www.jmir.org">https://www.jmir.org</ext-link>), 7.5.2026. </copyright-statement><copyright-year>2026</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in the Journal of Medical Internet Research (ISSN 1438-8871), is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="https://www.jmir.org/">https://www.jmir.org/</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://www.jmir.org/2026/1/e82756"/><abstract><sec><title>Background</title><p>Generative artificial intelligence (GenAI) is enhancing virtual patient simulations in health care education by enabling dynamic, adaptive interactions, reshaping how clinical skills are taught. A synthesis of the current evidence is needed to guide implementation and future research, given the pace of technological advancement.</p></sec><sec><title>Objective</title><p>This systematic review aims to synthesize empirical research on the design, implementation, and educational impact of GenAI-supported virtual patients in health care education.</p></sec><sec sec-type="methods"><title>Methods</title><p>A systematic search was conducted across 5 databases (CINAHL, Medline, Embase, Scopus, and Web of Science) from their inception to March 19, 2026. Reference lists of included studies and relevant systematic reviews were also screened. Peer-reviewed studies in English that evaluated GenAI-supported virtual patients using quantitative or mixed methods were included. Two reviewers independently screened studies and extracted data. Study quality and risk of bias were assessed critically using JBI (Joanna Briggs Institute) checklists, with disagreements resolved by consensus.</p></sec><sec sec-type="results"><title>Results</title><p>A total of 15 studies met the inclusion criteria (total participants N=645), spanning health care disciplines, including nursing, medicine, pharmacy, radiography, and medical first-responder training. The virtual patients varied in design; input modalities included text (9 studies), voice (5 studies), or hybrid (1 study); output was text (9 studies), speech (5 studies), or both (1 study); 6 studies used 3D-embodied avatars, while 9 used nonembodied interfaces. A total of 13 studies used OpenAI GPT models (eg, ChatGPT), 1 used a fine-tuned model from a different provider, and 1 evaluated multiple model families (Claude, GPT, and open-source). Further, 6 studies used controlled experimental designs, including 3 randomized controlled trials (RCTs); the remainder were cross-sectional or prepost evaluations. Primary outcomes included user perceptions (14 studies), communication skills (4 studies), clinical reasoning (3 studies), and performance (7 studies). In controlled comparisons, GenAI-supported virtual patients consistently improved outcomes relative to control conditions: for example, enhanced clinical decision-making (RCT, n=21), ophthalmology history-taking skills (RCT, n=26), and medical history-taking performance (crossover RCT, n=20). The evidence base is characterized by brief intervention durations, a predominant reliance on single-session interactions, and a general lack of underpinning educational theory. No meta-analysis was performed due to the limited number of studies and significant heterogeneity in designs, interventions, and outcome measures.</p></sec><sec sec-type="conclusions"><title>Conclusions</title><p>The evidence supports the feasibility and acceptability of GenAI-supported virtual patients, with positive learner perceptions and promising outcomes for skills development. However, critical limitations remain in emotional-behavioral complexity, simulation adaptability, and research design rigor (eg, limited use of control groups and validated instruments). The review offers educators, instructional designers, and policymakers actionable insights for integrating dynamic, artificial intelligence&#x2013;driven simulations while identifying crucial gaps&#x2014;such as the need for theoretical grounding, longitudinal studies, and standardized design protocols&#x2014;that must be addressed for safe and effective implementation.</p></sec><sec><title>Trial Registration</title><p>Open Science Framework (OSF) q8b5n; https://osf.io/q8b5n/files/mysz3</p></sec></abstract><kwd-group><kwd>systematic review</kwd><kwd>generative AI</kwd><kwd>virtual patient</kwd><kwd>health care education</kwd><kwd>PRISMA</kwd><kwd>generative artificial intelligence</kwd><kwd>Preferred Reporting Items for Systematic Reviews and Meta-Analyses</kwd></kwd-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><sec id="s1-1"><title>Background</title><p>Virtual patients are sophisticated educational simulations designed to replicate authentic clinical scenarios, enabling health care trainees to practice skills in a safe, controlled environment without risk to real patients [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref2">2</xref>]. They are defined as &#x201C;a representation of an actual patient,&#x201D; which can include various forms such as software-based simulators or manikins, and specifically as &#x201C;a computer program that simulates real-life clinical scenarios in which the learner acts as a health care provider,&#x201D; making clinical decisions [<xref ref-type="bibr" rid="ref3">3</xref>]. They serve as a cornerstone of modern clinical education, aiming to enhance clinical reasoning, communication proficiency, and decision-making abilities [<xref ref-type="bibr" rid="ref4">4</xref>]. The recent and rapid integration of generative artificial intelligence (GenAI)&#x2014;a subset of artificial intelligence (AI) powered by large language models (LLMs) and natural language processing&#x2014;is fundamentally transforming this educational tool [<xref ref-type="bibr" rid="ref5">5</xref>,<xref ref-type="bibr" rid="ref6">6</xref>]. Unlike traditional scripted simulations, GenAI-supported virtual patients can generate dynamic, adaptive, and contextually relevant responses in real-time, allowing for more realistic conversations, emotional responsiveness, and personalized learning experiences [<xref ref-type="bibr" rid="ref7">7</xref>,<xref ref-type="bibr" rid="ref8">8</xref>]. This shift marks a significant evolution in simulation-based learning, moving from static, preprogrammed cases toward interactive, intelligent patient encounters.</p></sec><sec id="s1-2"><title>Rationale</title><p>The evolution of virtual patients demonstrates a clear trajectory toward greater interactivity and realism. From early, static computer-based cases on systems such as PLATO [<xref ref-type="bibr" rid="ref9">9</xref>], modern iterations now incorporate multimedia, branching narratives, and data-driven models to create more engaging and realistic clinical encounters [<xref ref-type="bibr" rid="ref10">10</xref>,<xref ref-type="bibr" rid="ref11">11</xref>]. This evolution has yielded significant educational benefits. Empirical studies show that virtual patients can improve clinical decision-making, diagnostic accuracy, and foundational skills such as screening and referral across various health disciplines [<xref ref-type="bibr" rid="ref11">11</xref>,<xref ref-type="bibr" rid="ref12">12</xref>]. For instance, they have been successfully implemented as standardized, unfolding simulations to replace scarce pediatric clinical hours while maintaining clinical competency in nursing education [<xref ref-type="bibr" rid="ref13">13</xref>]. Furthermore, a pilot study grounded in Experiential Learning Theory demonstrated that virtual patient simulation led to statistically significant improvements in clinical reasoning and communication skills among prelicensure nursing students [<xref ref-type="bibr" rid="ref14">14</xref>]. They provide a scalable, consistent training environment that addresses limitations inherent to human standardized patients, such as fatigue and variability [<xref ref-type="bibr" rid="ref7">7</xref>,<xref ref-type="bibr" rid="ref15">15</xref>].</p><p>Despite these advances, a critical constraint remains: the predominant reliance on prescripted, linear scenarios. This often results in predictable interactions that fail to fully replicate the dynamic, adaptive, and complex nature of real patient encounters, potentially limiting learner engagement and the ability to respond to individualized student input [<xref ref-type="bibr" rid="ref16">16</xref>,<xref ref-type="bibr" rid="ref17">17</xref>]. Consequently, there is a recognized need for more dynamic and adaptable virtual patient models to better prepare learners for clinical practice [<xref ref-type="bibr" rid="ref18">18</xref>].</p><p>GenAI presents a transformative solution to this limitation. A subset of AI powered by LLMs, GenAI can generate context-aware, realistic text, dialog, and even simulate human behaviors in real-time [<xref ref-type="bibr" rid="ref19">19</xref>,<xref ref-type="bibr" rid="ref20">20</xref>]. In health care education, integrating GenAI allows virtual patients to move beyond scripts, generating unique, adaptive responses based on learner inquiries and simulating a wider range of patient behaviors and emotional states [<xref ref-type="bibr" rid="ref7">7</xref>,<xref ref-type="bibr" rid="ref8">8</xref>]. This capability promises to significantly enhance the authenticity, personalization, and educational value of simulation-based training.</p><p>However, the implementation of this novel technology introduces new complexities regarding its design, pedagogical integration, and evaluation. While systematic reviews have established the effectiveness of traditional, scripted virtual patients [<xref ref-type="bibr" rid="ref21">21</xref>,<xref ref-type="bibr" rid="ref22">22</xref>] and scoped the broad potential of GenAI in education [<xref ref-type="bibr" rid="ref5">5</xref>], a critical gap remains. Existing syntheses are unequipped to address the unique research questions (RQs) generated by their convergence. Specifically, the literature lacks evidence-based guidance on how the adaptive, nondeterministic nature of GenAI alters optimal instructional design, what novel evaluation frameworks are required to measure its impact on dynamic clinical reasoning, and how the practical challenges of implementation differ from those of static simulations. This dedicated synthesis is absent. Consequently, without a focused systematic review, evidence regarding the optimal design features, verifiable educational impact, and practical challenges of these advanced tools remains fragmented, hindering evidence-based adoption and focused research development [<xref ref-type="bibr" rid="ref23">23</xref>,<xref ref-type="bibr" rid="ref24">24</xref>].</p><p>Given these factors, it is essential to conduct a systematic review that not only summarizes the characteristics of GenAI-supported virtual patients but also provides comprehensive guidance for future research in this emerging field. Such a review will help educators, researchers, and policymakers understand the potential benefits and limitations of GenAI-supported virtual patients, thereby facilitating their integration into health care education curricula.</p></sec><sec id="s1-3"><title>Objectives</title><p>The primary objective of this systematic review is to synthesize the current empirical evidence on the design, implementation, and educational impact of GenAI-supported virtual patients in health care education. Specifically, the review addresses the following RQs:</p><p>RQ1. What are the design choices, technological architecture, and educational strategies embedded within GenAI-supported virtual patients?</p><p>RQ2. What are the evaluation and educational impact, including benefits, outcomes, and related limitations of GenAI-supported virtual patients?</p></sec></sec><sec id="s2" sec-type="methods"><title>Methods</title><sec id="s2-1"><title>Study Design</title><p>This review was conducted and reported in accordance with the PRISMA (Preferred Reporting Items for Systematic Reviews and Meta-Analyses) 2020 statement and its expanded checklist (<xref ref-type="supplementary-material" rid="app3">Checklist 1</xref>) [<xref ref-type="bibr" rid="ref25">25</xref>], as well as synthesis without meta-analysis (<xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>) [<xref ref-type="bibr" rid="ref26">26</xref>]. The protocol was registered and published in the Open Science Framework repository [<xref ref-type="bibr" rid="ref27">27</xref>].</p></sec><sec id="s2-2"><title>Eligibility Criteria</title><p>The following inclusion criteria were used: (1) original studies published in peer-reviewed journals, (2) focused on evaluating how GenAI-supported virtual patients affect education and training in health care&#x2013;related disciplines, (3) included measurements of user experience or user outcomes, (4) conducted using quantitative or mixed methods research design, and (5) written in English. The exclusion criteria were the following: (1) book chapters, editorials, short communications, letters, and review literature; and (2) studies focused solely on the technical development of virtual patients without educational evaluation.</p></sec><sec id="s2-3"><title>Information Sources</title><p>A systematic search was performed across 5 databases from their inception to March 19, 2026: CINAHL (via EBSCOhost), MEDLINE (via EBSCOhost), Embase (via Elsevier), Scopus (via Elsevier), and Web of Science Core Collection (via Clarivate). These databases were chosen for their comprehensive coverage of biomedical, nursing, allied health, and interdisciplinary literature. Furthermore, the reference lists of all included full-text papers were manually reviewed to identify additional relevant studies.</p></sec><sec id="s2-4"><title>Search Strategy</title><p>The search strategy was developed iteratively for each database to ensure comprehensive coverage of the core concepts of &#x201C;virtual patient&#x201D; and &#x201C;generative artificial intelligence.&#x201D; For each database, the strategy combined controlled vocabulary (where available, eg, MeSH [Medical Subject Headings] in MEDLINE and CINAHL Subject Headings) with extensive free-text terms, using appropriate field-specific syntax (eg, MH and XB in CINAHL and MEDLINE, :ti,ab in Embase, TITLE-ABS-KEY in Scopus, and TS= in Web of Science). Synonyms for virtual patients and GenAI models were grouped logically, and all free-text terms were searched in the title, abstract, and keyword fields. Searches were executed using the advanced search interface of each platform. The search was limited to records published in English and, where available, to peer-reviewed papers. The search was updated and rerun on March 19, 2026, after incorporating expanded terms and controlled vocabulary as suggested during peer review. The full search strategy is reported in <xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref> and in accordance with the PRISMA-S (Preferred Reporting Items for Systematic Reviews and Meta-Analyses Literature Search Extension) checklist [<xref ref-type="bibr" rid="ref28">28</xref>].</p><p>Several elements of the PRISMA-S checklist [<xref ref-type="bibr" rid="ref28">28</xref>] did not apply to our methodology: we did not search databases simultaneously on a single platform (item 2); we did not search study registries (item 3); we did not browse online or print sources (item 4); we did not perform citation searching beyond checking reference lists of included studies (item 5); we did not contact authors, experts, or manufacturers to identify additional studies (item 6); we did not use any other information sources or methods beyond those described (item 7); we did not use published search filters (item 10); we did not adapt search strategies from prior reviews (item 11); we did not set up email alerts or automated updates, but we did manually rerun the search after refining the strategy (item 12); and the search was not formally peer reviewed (item 14). These items are explicitly noted as not applicable in this paper.</p></sec><sec id="s2-5"><title>Selection Process</title><p>All records identified from the database searches were imported into Zotero reference management software for deduplication. The selection process was conducted in 2 phases. First, 2 reviewers (JJ and MZY) independently screened the titles and abstracts of all records against the eligibility criteria. Second, the full texts of potentially eligible studies were retrieved and independently assessed for inclusion by the same 2 reviewers. Any disagreements at either stage were resolved through discussion between the reviewers until consensus was reached. The interrater reliability was calculated using Cohen &#x03BA;, resulting in &#x03BA;=0.7, which indicates substantial agreement [<xref ref-type="bibr" rid="ref29">29</xref>].</p></sec><sec id="s2-6"><title>Data Collection Process</title><p>A standardized data extraction form was developed in Google Sheets (Google LLC). Further, 2 reviewers (JJ and MZY) independently extracted data from each included study. The extracted data were then cross-checked, and any discrepancies were resolved through discussion. The interrater reliability was &#x03BA;=0.6, which indicates moderate agreement [<xref ref-type="bibr" rid="ref29">29</xref>].</p></sec><sec id="s2-7"><title>Data Items</title><p>Google Sheets (Google LLC) were used for data extraction. The following information was extracted from each of the papers that met the inclusion criteria: (1) publication characteristics: authors, publication year, and source; (2) study characteristics: study design and sample size; (3) intervention characteristics: description of the GenAI-supported virtual patient, including input or output modalities, use of an avatar, duration of interaction, technological details (eg, GenAI model and prompt engineering), and integration with educational strategies or theories; (4) outcomes: all reported outcome measures, including primary and secondary outcomes related to user perceptions (eg, usability, satisfaction, and perceived learning), skills (eg, communication and clinical reasoning), and performance; and (5) key results: main quantitative and qualitative findings as reported by this study&#x2019;s authors.</p></sec><sec id="s2-8"><title>Study Risk of Bias Assessment</title><p>To critically appraise the methodological quality and risk of bias of the included studies, a formal assessment was conducted in accordance with PRISMA guidelines (item 11). The JBI (Joanna Briggs Institute) critical appraisal checklists, appropriate to each study design, were used as standardized tools [<xref ref-type="bibr" rid="ref30">30</xref>]. Specifically, the JBI Checklist for Randomized Controlled Trials was used for randomized controlled trials (RCTs) [<xref ref-type="bibr" rid="ref31">31</xref>], the JBI Checklist for Quasi-Experimental Studies was used for nonrandomized comparative studies [<xref ref-type="bibr" rid="ref32">32</xref>], and the JBI Checklist for Analytical Cross-Sectional Studies was used for cross-sectional evaluations [<xref ref-type="bibr" rid="ref32">32</xref>]. Further, 2 reviewers independently assessed each study. Any discrepancies in appraisal judgments were resolved through discussion to reach consensus. The interrater reliability was &#x03BA;=0.8, which indicates substantial agreement [<xref ref-type="bibr" rid="ref29">29</xref>]. The results of this assessment are synthesized narratively and were considered when interpreting the overall strength and validity of the evidence presented in this review.</p></sec><sec id="s2-9"><title>Effect Measures</title><p>Current review synthesized findings narratively, thus no common effect measures were pooled across studies due to significant heterogeneity in study designs, interventions, and outcome measures.</p></sec><sec id="s2-10"><title>Synthesis Methods</title><p>A meta-analysis was not feasible due to the limited number of studies and substantial heterogeneity in interventions, populations, and outcome measures. Therefore, a narrative synthesis was conducted. The extracted data were summarized and organized to address the review&#x2019;s RQs. Findings were structured thematically to describe: (1) the implementation characteristics (design, technology, and educational strategies) of GenAI-supported virtual patients, and (2) their evaluation and educational impact (benefits, outcomes, methodological approaches, and limitations). The synthesis also integrates a discussion of the methodological quality and risk of bias of the included studies.</p></sec><sec id="s2-11"><title>Reporting Bias Assessment</title><p>No formal statistical assessment of publication bias (eg, funnel plot) was performed due to the narrative synthesis approach and the small number of included studies.</p></sec><sec id="s2-12"><title>Certainty Assessment</title><p>A formal assessment of the certainty of the body of evidence (eg, GRADE [Grading of Recommendations, Assessment, Development, and Evaluation]) was not conducted for this narrative review, as its primary aim was to map and characterize an emerging field rather than to estimate a pooled treatment effect.</p></sec></sec><sec id="s3" sec-type="results"><title>Results</title><sec id="s3-1"><title>Study Selection</title><p>The initial literature search yielded 2860 studies. After screening the abstracts, 107 papers were selected for further evaluation. Further, 2 authors screened full texts independently, and the screening was then discussed as a group to ensure consensus and make the final selection decision. Ultimately, 15 papers were included in the final analysis after reading the full-text. <xref ref-type="fig" rid="figure1">Figure 1</xref> provides a detailed overview of the systematic search procedure.</p><fig position="float" id="figure1"><label>Figure 1.</label><caption><p>Summary of the selection of publications. GenAI: generative artificial intelligence.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="jmir_v28i1e82756_fig01.png"/></fig></sec><sec id="s3-2"><title>Study Characteristics</title><p>All 15 studies included in this review were published in peer-reviewed journals. Research in this area was prominently featured in the JMIR portfolio, which published 6 of the included papers: 3 papers in <italic>JMIR Medical Education</italic> [<xref ref-type="bibr" rid="ref33">33</xref>-<xref ref-type="bibr" rid="ref35">35</xref>], 2 papers in the <italic>Journal of Medical Internet Research</italic> [<xref ref-type="bibr" rid="ref36">36</xref>,<xref ref-type="bibr" rid="ref37">37</xref>], and 1 paper in <italic>JMIR Formative Research</italic> [<xref ref-type="bibr" rid="ref38">38</xref>]. Temporally, 8 studies were published in 2024 [<xref ref-type="bibr" rid="ref33">33</xref>-<xref ref-type="bibr" rid="ref35">35</xref>,<xref ref-type="bibr" rid="ref38">38</xref>-<xref ref-type="bibr" rid="ref42">42</xref>], marking a peak of initial investigative activity, followed by 7 more studies in 2025 and 2026 [<xref ref-type="bibr" rid="ref36">36</xref>,<xref ref-type="bibr" rid="ref37">37</xref>,<xref ref-type="bibr" rid="ref43">43</xref>-<xref ref-type="bibr" rid="ref47">47</xref>], indicating sustained scholarly interest. The included studies used a diverse range of methodologies. The 2024 cohort predominantly featured cross-sectional (n=5) [<xref ref-type="bibr" rid="ref33">33</xref>,<xref ref-type="bibr" rid="ref34">34</xref>,<xref ref-type="bibr" rid="ref38">38</xref>,<xref ref-type="bibr" rid="ref39">39</xref>,<xref ref-type="bibr" rid="ref42">42</xref>] and quasi-experimental (n=2) [<xref ref-type="bibr" rid="ref35">35</xref>,<xref ref-type="bibr" rid="ref40">40</xref>] designs, alongside 1 RCT [<xref ref-type="bibr" rid="ref41">41</xref>]. The 2025&#x2010;2026 publications demonstrated a continued diversification of methods, including 2 additional RCTs [<xref ref-type="bibr" rid="ref45">45</xref>,<xref ref-type="bibr" rid="ref47">47</xref>], 4 quasi-experimental studies [<xref ref-type="bibr" rid="ref36">36</xref>,<xref ref-type="bibr" rid="ref43">43</xref>,<xref ref-type="bibr" rid="ref44">44</xref>,<xref ref-type="bibr" rid="ref46">46</xref>], and 1 proof-of-concept observational study [<xref ref-type="bibr" rid="ref37">37</xref>]. Participant sample sizes varied widely, from smaller feasibility studies (eg, n=6 [<xref ref-type="bibr" rid="ref40">40</xref>]) to larger-scale evaluations (eg, n=145 [<xref ref-type="bibr" rid="ref35">35</xref>,<xref ref-type="bibr" rid="ref43">43</xref>]), and a paired crossover study with 20 medical students [<xref ref-type="bibr" rid="ref47">47</xref>]. The duration and frequency of interventions with the GenAI-powered virtual patients also differed, ranging from brief, single interactions of approximately 6&#x2010;10 minutes [<xref ref-type="bibr" rid="ref38">38</xref>] to more extended or repeated practice sessions over several weeks [<xref ref-type="bibr" rid="ref43">43</xref>]. This heterogeneity in publication venues, design, scale, and intervention format reflects the exploratory and rapidly evolving nature of research in this domain.</p><p>Moreover, among the 15 included studies, 13 of them explicitly stated obtaining ethics approval from an institutional review board and described informed consent procedures [<xref ref-type="bibr" rid="ref33">33</xref>-<xref ref-type="bibr" rid="ref39">39</xref>,<xref ref-type="bibr" rid="ref41">41</xref>,<xref ref-type="bibr" rid="ref43">43</xref>-<xref ref-type="bibr" rid="ref45">45</xref>]. Specific data privacy or security measures were reported less consistently, detailed in only 8 (54%) studies [<xref ref-type="bibr" rid="ref33">33</xref>-<xref ref-type="bibr" rid="ref35">35</xref>,<xref ref-type="bibr" rid="ref37">37</xref>-<xref ref-type="bibr" rid="ref39">39</xref>,<xref ref-type="bibr" rid="ref43">43</xref>,<xref ref-type="bibr" rid="ref46">46</xref>]. Notably, 2 studies [<xref ref-type="bibr" rid="ref40">40</xref>,<xref ref-type="bibr" rid="ref42">42</xref>] documented that formal ethics approval was not required for their projects, citing their design as a quality assurance activity and a survey of professionals, respectively.</p></sec><sec id="s3-3"><title>Risk of Bias in Included Studies</title><p>The methodological quality of the 15 included studies, as assessed by the JBI critical appraisal checklists, demonstrated a spectrum of risk of bias (<xref ref-type="table" rid="table1">Table 1</xref>). Further, 3 RCTs were judged to have a low to low-moderate risk of bias [<xref ref-type="bibr" rid="ref41">41</xref>,<xref ref-type="bibr" rid="ref45">45</xref>,<xref ref-type="bibr" rid="ref47">47</xref>], benefiting from strong methodologies including randomization, allocation concealment, and blinded outcome assessment. However, the nature of the interventions precluded participant blinding, a common limitation in educational technology studies. In contrast, the body of quasi-experimental [<xref ref-type="bibr" rid="ref35">35</xref>,<xref ref-type="bibr" rid="ref36">36</xref>,<xref ref-type="bibr" rid="ref40">40</xref>,<xref ref-type="bibr" rid="ref43">43</xref>,<xref ref-type="bibr" rid="ref44">44</xref>,<xref ref-type="bibr" rid="ref46">46</xref>] and cross-sectional studies [<xref ref-type="bibr" rid="ref33">33</xref>,<xref ref-type="bibr" rid="ref34">34</xref>,<xref ref-type="bibr" rid="ref37">37</xref>-<xref ref-type="bibr" rid="ref39">39</xref>,<xref ref-type="bibr" rid="ref42">42</xref>] presented a higher risk of bias. Common methodological weaknesses across these designs included the use of small, nonrepresentative samples, a lack of control groups, the absence of preintervention or longitudinal outcome measurements, and frequent reliance on unvalidated or self-reported outcome measures. Consequently, while the evidence from RCTs provides a stronger foundation for causal inference regarding the impact of GenAI-supported virtual patients, the overall findings of this review&#x2014;particularly those related to user perceptions and feasibility&#x2014;are drawn from a predominantly moderate-risk evidence base. This necessitates a cautious interpretation of the results, as positive outcomes may be influenced by study design limitations and enthusiasm for a novel technology.</p><table-wrap id="t1" position="float"><label>Table 1.</label><caption><p>Risk of bias assessment of included studies.</p></caption><table id="table1" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Study type and reference citation</td><td align="left" valign="bottom">Overall risk of bias or appraisal judgment</td><td align="left" valign="bottom">Key concerns or limitations</td></tr></thead><tbody><tr><td align="left" valign="top" colspan="3">RCTs<sup><xref ref-type="table-fn" rid="table1fn1">a</xref></sup></td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>[<xref ref-type="bibr" rid="ref41">41</xref>]</td><td align="left" valign="top">Low to moderate risk</td><td align="left" valign="top">Unclear allocation concealment; potential lack of blinding for treatment deliverers.</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>[<xref ref-type="bibr" rid="ref45">45</xref>]</td><td align="left" valign="top">Low risk</td><td align="left" valign="top">Lack of participant blinding (acknowledged as an expected limitation for the intervention type).</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>[<xref ref-type="bibr" rid="ref47">47</xref>]</td><td align="left" valign="top">Low risk</td><td align="left" valign="top">Well-designed crossover RCT; lack of participant blinding is an inherent limitation.</td></tr><tr><td align="left" valign="top" colspan="3">Quasi-experimental studies</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>[<xref ref-type="bibr" rid="ref40">40</xref>]</td><td align="left" valign="top">Moderate to high risk</td><td align="left" valign="top">Intentional selection of dissimilar participants; no control group; no preintervention measurement; unclear reliability of qualitative coding.</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>[<xref ref-type="bibr" rid="ref35">35</xref>]</td><td align="left" valign="top">Moderate risk</td><td align="left" valign="top">Nonrandomized historical control group; no pretest for primary outcome; intervention was supplementary to the standard curriculum.</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>[<xref ref-type="bibr" rid="ref36">36</xref>]</td><td align="left" valign="top">Moderate risk</td><td align="left" valign="top">Lack of a preintervention baseline measure for the primary outcome.</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>[<xref ref-type="bibr" rid="ref43">43</xref>]</td><td align="left" valign="top">Moderate risk</td><td align="left" valign="top">No control group; no pretest measurement; reliance on self-reported data; unclear reliability of survey instrument.</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>[<xref ref-type="bibr" rid="ref44">44</xref>]</td><td align="left" valign="top">Moderate risk</td><td align="left" valign="top">No control group; only partial prepost measurement for some outcomes; small convenience sample.</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>[<xref ref-type="bibr" rid="ref46">46</xref>]</td><td align="left" valign="top">Moderate risk</td><td align="left" valign="top">Between-groups design with nonrandomized assignment; small sample size; single-site study.</td></tr><tr><td align="left" valign="top" colspan="3">Analytical cross-sectional studies</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>[<xref ref-type="bibr" rid="ref39">39</xref>]</td><td align="left" valign="top">High risk</td><td align="left" valign="top">Very small sample; lack of validated measures; no control for confounders; limited generalizability.</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>[<xref ref-type="bibr" rid="ref33">33</xref>]</td><td align="left" valign="top">Moderate risk</td><td align="left" valign="top">Lack of confounding factor consideration; use of a single case scenario.</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>[<xref ref-type="bibr" rid="ref34">34</xref>]</td><td align="left" valign="top">Moderate risk</td><td align="left" valign="top">Lack of consideration for confounding factors; single case or model; potential selection bias.</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>[<xref ref-type="bibr" rid="ref42">42</xref>]</td><td align="left" valign="top">Moderate risk</td><td align="left" valign="top">Small sample; lack of objective outcome measures; no control group; potential for selection or response bias.</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>[<xref ref-type="bibr" rid="ref38">38</xref>]</td><td align="left" valign="top">Moderate risk</td><td align="left" valign="top">Subjective outcome measures; no objective performance assessment; no control group; small sample.</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>[<xref ref-type="bibr" rid="ref37">37</xref>]</td><td align="left" valign="top">Moderate risk</td><td align="left" valign="top">No control group; small single-site sample; reliance on self-reported benefits; no long-term skill retention measures.</td></tr></tbody></table><table-wrap-foot><fn id="table1fn1"><p><sup>a</sup>RCT: randomized controlled trial.</p></fn></table-wrap-foot></table-wrap></sec><sec id="s3-4"><title>Results of Syntheses</title><p>To address the RQ1 concerning the design and implementation characteristics of GenAI-supported virtual patients, the following sections analyze the extracted data, which are comprehensively tabulated in <xref ref-type="table" rid="table2">Table 2</xref>.</p><table-wrap id="t2" position="float"><label>Table 2.</label><caption><p>Characteristics of the virtual patients and the GenAI<sup><xref ref-type="table-fn" rid="table2fn1">a</xref></sup> models used in the 15 included studies. Detailing input or output modalities, avatar design, the AI&#x2019;s<sup><xref ref-type="table-fn" rid="table2fn2">b</xref></sup> tasked role, and using educational frameworks.</p></caption><table id="table2" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Reference citation</td><td align="left" valign="bottom">Input (eg, text and voice)</td><td align="left" valign="bottom">Output (eg, text and voice)</td><td align="left" valign="bottom">Avatar (2D/3D)</td><td align="left" valign="bottom">Movement</td><td align="left" valign="bottom">Emotion expression</td><td align="left" valign="bottom">Type or name of AI</td><td align="left" valign="bottom">Overview of the task for GenAI based on the designated prompt</td><td align="left" valign="bottom">Educational theories or models</td></tr></thead><tbody><tr><td align="left" valign="top">[<xref ref-type="bibr" rid="ref39">39</xref>]</td><td align="left" valign="top">Text</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Text</p></list-item></list></td><td align="left" valign="top">N/A<sup><xref ref-type="table-fn" rid="table2fn3">c</xref></sup></td><td align="left" valign="top">N/A</td><td align="left" valign="top">N/A</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>ChatGPT (version not specified)</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Simulate a realistic clinical interaction focusing on assessment, communication, and nursing care for respiratory distress</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>N/A</p></list-item></list></td></tr><tr><td align="left" valign="top">[<xref ref-type="bibr" rid="ref40">40</xref>]</td><td align="left" valign="top">Text</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Text</p></list-item></list></td><td align="left" valign="top">N/A</td><td align="left" valign="top">N/A</td><td align="left" valign="top">Using case scenarios such as patients with claustrophobia undergoing an MRI<sup><xref ref-type="table-fn" rid="table2fn4">d</xref></sup> scan</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>OpenAI ChatGPT 3.5 and ChatGPT 4</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>A roleplay designed to simulate a conversation between a radiology technician and a patient with claustrophobia during an MRI examination. The AI takes the role of the patient, while the user plays the technician.</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>N/A</p></list-item></list></td></tr><tr><td align="left" valign="top">[<xref ref-type="bibr" rid="ref33">33</xref>]</td><td align="left" valign="top">Text</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Text</p></list-item></list></td><td align="left" valign="top">N/A</td><td align="left" valign="top">N/A</td><td align="left" valign="top">N/A</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>OpenAI GPT-4</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Two prompts were developed: one for providing the interactive history-taking dialog, and the other for giving feedback</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>N/A</p></list-item></list></td></tr><tr><td align="left" valign="top">[<xref ref-type="bibr" rid="ref34">34</xref>]</td><td align="left" valign="top">Text</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Text</p></list-item></list></td><td align="left" valign="top">N/A</td><td align="left" valign="top">N/A</td><td align="left" valign="top">N/A</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>OpenAI GPT-3.5</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>GPT acts as a simulated patient. The prompts were designed to guide GPT&#x2019;s behavior and ensure it provided medically accurate and relevant responses.</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>N/A</p></list-item></list></td></tr><tr><td align="left" valign="top">[<xref ref-type="bibr" rid="ref35">35</xref>]</td><td align="left" valign="top">Text</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Text</p></list-item></list></td><td align="left" valign="top">N/A</td><td align="left" valign="top">N/A</td><td align="left" valign="top">The emotional parameters were set from 1 to 10 for 8 emotions: joy, sadness, anticipation, surprise, fear, disgust, trust, and anger</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>GPT-4 Turbo</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>The prompt is designed to simulate a chatbot role-playing as a medical patient with dynamic emotional behavior. It consists of two major phases: (1) roleplay phase (simulated patient behavior): governs how the chatbot behaves during the medical consultation, and (2) feedback phase (interaction evaluation): after the roleplay ends, the chatbot switches to feedback mode and evaluates the user&#x2019;s performance.</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>N/A</p></list-item></list></td></tr><tr><td align="left" valign="top">[<xref ref-type="bibr" rid="ref41">41</xref>]</td><td align="left" valign="top">Text</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Text</p></list-item></list></td><td align="left" valign="top">N/A</td><td align="left" valign="top">N/A</td><td align="left" valign="top">N/A</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>OpenAI GPT-3.5</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Control group (AI simulation only): a virtual patient scenario crafted for emergency and neurological assessment training. The AI simulates a patient experiencing a traumatic brain injury.</p></list-item><list-item><p>Feedback group (AI simulation+ AI feedback): the AI first simulates the patient with the same setting in the control group and then provides diagnostic feedback assessment for participants who play as the doctor after their interaction.</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>N/A</p></list-item></list></td></tr><tr><td align="left" valign="top">[<xref ref-type="bibr" rid="ref42">42</xref>]</td><td align="left" valign="top">Text, voice</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Text, voice</p></list-item></list></td><td align="left" valign="top">3D</td><td align="left" valign="top">N/A</td><td align="left" valign="top">N/A</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Generative conversational AI, specifically using the platform Convai (Convai Technologies Inc) and incorporating ChatGPT (version was not mentioned) for text generation</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Provide comprehensive content on ORIF<sup><xref ref-type="table-fn" rid="table2fn5">e</xref></sup> surgery suitable for training a large language model, which is then subsequently further expanded.</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>N/A</p></list-item></list></td></tr><tr><td align="left" valign="top">[<xref ref-type="bibr" rid="ref38">38</xref>]</td><td align="left" valign="top">Voice</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Voice</p></list-item></list></td><td align="left" valign="top">3D</td><td align="left" valign="top">Vive Trackers (version 3.0) were placed on the head, hands, feet, and groin of the manikin and mapped to the corresponding parts of the VP&#x2019;s<sup><xref ref-type="table-fn" rid="table2fn6">f</xref></sup> avatar. This allowed the MFRs<sup><xref ref-type="table-fn" rid="table2fn7">g</xref></sup> to freely move the manikin and thus the VP.</td><td align="left" valign="top">Emotion is expressed through the voice, including stuttering, groans, and cries, as well as statements reflecting fear and pain</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>OpenAI GPT-3.5-Turbo</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>This simulation helps practice trauma-informed care, nonverbal cues, and managing patients in acute distress. All responses are limited to a maximum of 8 stuttered words.</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>N/A</p></list-item></list></td></tr><tr><td align="left" valign="top">[<xref ref-type="bibr" rid="ref36">36</xref>]</td><td align="left" valign="top">Voice (student&#x2019;s spoken questions, converted to text via speech-to-text)</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Voice (synthesized speech via text-to-speech)</p></list-item><list-item><p>Visual (facial expressions projected onto the robot)</p></list-item></list></td><td align="left" valign="top">3D (Furhat social robot with animated face back-projected onto a translucent mask)</td><td align="left" valign="top">Natural head movements (neck with 3 degrees of freedom)</td><td align="left" valign="top">Facial expressions (eg, sad, happy, or surprised) were generated and synchronized with speech</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>OpenAI GPT-3.5-turbo</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Dialog generation: to generate the next patient dialog line. The prompt includes the patient case description, the last 10 dialog turns, and instructions to respond as the patient.</p></list-item><list-item><p>Expression generation: to select appropriate facial expressions (from a predefined set) at anchor points within the generated dialog text to reflect the patient&#x2019;s emotional state.</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>N/A</p></list-item></list></td></tr><tr><td align="left" valign="top">[<xref ref-type="bibr" rid="ref43">43</xref>]</td><td align="left" valign="top">Text</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Text</p></list-item></list></td><td align="left" valign="top">N/A</td><td align="left" valign="top">N/A</td><td align="left" valign="top">Textual descriptions of emotional state within dialog (eg, expressing stress or motivation). No visual or auditory emotion simulation</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>OpenAI ChatGPT 3.5</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Patient simulation: to act as a smoker seeking to quit, responding in character to student-led counseling based on a predefined case scenario that includes demographics, smoking habits, and motivations.</p></list-item><list-item><p>Performance feedback: after the counseling session, to evaluate the student&#x2019;s performance based on a structured rubric (the 5As<sup><xref ref-type="table-fn" rid="table2fn8">h</xref></sup> framework, empathy, communication skills, etc) and provide detailed textual feedback on strengths and areas for improvement.</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>The 5As framework for smoking cessation counseling</p></list-item></list></td></tr><tr><td align="left" valign="top">[<xref ref-type="bibr" rid="ref37">37</xref>]</td><td align="left" valign="top">Text</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Text</p></list-item></list></td><td align="left" valign="top">N/A</td><td align="left" valign="top">N/A</td><td align="left" valign="top">Textual descriptions of the patient&#x2019;s emotional state within dialog (eg, expressing anxiety or distress). No visual or auditory emotion simulation</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>OpenAI GPT-4</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Patient simulation: to act as a simulated patient (eg, a man aged 28 years with depression or a woman aged 46 years with agoraphobia) and respond in character to physician-led text-based dialog.</p></list-item><list-item><p>Real-time feedback generation: to analyze the physician&#x2019;s incoming text messages in real-time, identify the use of specific communication techniques (eg, open questions, reflections, empathy, or validation), and provide immediate formative textual feedback within the chat to confirm and encourage technique use.</p></list-item><list-item><p>Summative feedback generation: after the chat, to provide summarized feedback on the frequency of technique use, highlight underused techniques, and give examples for future application.</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>N/A</p></list-item></list></td></tr><tr><td align="left" valign="top">[<xref ref-type="bibr" rid="ref44">44</xref>]</td><td align="left" valign="top">Voice (learner&#x2019;s spoken questions to the virtual patient via HMD<sup><xref ref-type="table-fn" rid="table2fn9">i</xref></sup>, processed by a speech-to-text model)</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Voice (virtual patient&#x2019;s spoken responses generated by the AI, delivered via HMD with an AI-generated voice)</p></list-item></list></td><td align="left" valign="top">3D</td><td align="left" valign="top">Limited (the virtual human avatar is present but has limited physical interaction; cannot perform actions such as raising clothes or turning over as requested)</td><td align="left" valign="top">Limited (primarily text-based emotional cues within dialog. This study notes limitations such as unnatural voice expressiveness and an absence of emotional sentiment</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>OpenAI GPT-4o</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Patient simulation: to act as a virtual patient (a male aged 28 years with acute appendicitis) and engage in real-time, natural language dialog with nursing students for health assessment and communication training. The AI is provided with a detailed patient script and vital sign data to generate contextually relevant and medically accurate responses to the learner&#x2019;s verbal questions.</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Instructional design model for AI education</p></list-item><list-item><p>Technology acceptance model (for evaluation)</p></list-item></list></td></tr><tr><td align="left" valign="top">[<xref ref-type="bibr" rid="ref45">45</xref>]</td><td align="left" valign="top">Voice</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Voice</p></list-item></list></td><td align="left" valign="top">Visual avatar (image of patient&#x2019;s eye; 2D/3D not specified)</td><td align="left" valign="top">N/A</td><td align="left" valign="top">Expressed through voice (eg, anxiety or emotional responses)</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Fine-tuned Baichuan-13B-Chat (a large language model)</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>To simulate a digital ophthalmology patient for medical history-taking practice. The AI acts as the patient, responding in character to students&#x2019; verbal inquiries based on a detailed knowledge base derived from electronic health records. The system provides real-time interaction and, after the session, generates automated feedback and scores based on the comprehensiveness of the history taken.</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Kolb&#x2019;s experiential learning cycle</p></list-item><list-item><p>Calgary-Cambridge communication framework</p></list-item></list></td></tr><tr><td align="left" valign="top">[<xref ref-type="bibr" rid="ref46">46</xref>]</td><td align="left" valign="top">Voice</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Voice</p></list-item></list></td><td align="left" valign="top">3D</td><td align="left" valign="top">Not specified (avatar is static in the examination room; no description of physical movement)</td><td align="left" valign="top">Facial expressions rendered in real-time by the D-ID<sup><xref ref-type="table-fn" rid="table2fn10">j</xref></sup> platform to create a full audiovisual experience; tone of voice (eg, irritable or rapport-building) also conveys emotion</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>OpenAI GPT-4o (via API<sup><xref ref-type="table-fn" rid="table2fn11">k</xref></sup> calls)</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Patient simulation: to act as a virtual patient (&#x201C;Randy Rhodes,&#x201D; a man aged 54 years with type 2 diabetes) for medical students to interview via voice-to-voice interaction. Custom agent instructions are informed by faculty-generated case materials. &#x201C;Guardrails&#x201D; are placed to optimize educational value (eg, preventing the AI from revealing the diagnosis directly or ensuring accurate presentation of pertinent positive findings). Within these limits, the AI is allowed to respond adaptively to students&#x2019; questions to maintain realism.</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>N/A</p></list-item></list></td></tr><tr><td align="left" valign="top">[<xref ref-type="bibr" rid="ref47">47</xref>]</td><td align="left" valign="top">Text</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Text</p></list-item></list></td><td align="left" valign="top">N/A</td><td align="left" valign="top">N/A</td><td align="left" valign="top">Textual descriptions of patient emotional state are generated based on integrated personality profiles (eg, the Big Five framework) to simulate emotional realism (eg, pain and anxiety). No visual or auditory emotion simulation</td><td align="left" valign="top">Claude models (5):<list list-type="bullet"><list-item><p>Claude3 Haiku</p></list-item><list-item><p>Claude-3-Sonnet</p></list-item><list-item><p>Claude-3&#x2010;5 Sonnet</p></list-item><list-item><p>Claude-4-Sonnet</p></list-item><list-item><p>Claude-4-Opus</p></list-item></list><break/>GPT-family models (3):<list list-type="bullet"><list-item><p>GPT-4 Turbo</p></list-item><list-item><p>GPT-4o</p></list-item><list-item><p>GPT-3.5 Turbo</p></list-item></list><break/>Open-source models (3):<list list-type="bullet"><list-item><p>DeepSeekV3 671B</p></list-item><list-item><p>Qwen3-32B</p></list-item><list-item><p>LLaMa-3 70B</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>To act as a simulated patient (&#x201C;AIPatient&#x201D;) based on real EHR<sup><xref ref-type="table-fn" rid="table2fn12">l</xref></sup> data from the MIMIC-III<sup><xref ref-type="table-fn" rid="table2fn13">m</xref></sup> database. The AI engages in text-based dialog with medical students for history-taking practice. Its task is to provide accurate, readable, and consistent responses to clinical questions while incorporating diverse personality traits to simulate realistic patient behavior, including emotional expressions.</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>N/A</p></list-item></list></td></tr></tbody></table><table-wrap-foot><fn id="table2fn1"><p><sup>a</sup>GenAI: generative artificial intelligence.</p></fn><fn id="table2fn2"><p><sup>b</sup>AI: artificial intelligence.</p></fn><fn id="table2fn3"><p><sup>c</sup>N/A: not applicable.</p></fn><fn id="table2fn4"><p><sup>d</sup>MRI: magnetic resonance imaging.</p></fn><fn id="table2fn5"><p><sup>e</sup>ORIF: open reduction and internal fixation.</p></fn><fn id="table2fn6"><p><sup>f</sup>VP: virtual patient.</p></fn><fn id="table2fn7"><p><sup>g</sup>MFR: medical first responder.</p></fn><fn id="table2fn8"><p><sup>h</sup>5A: Ask, Advise, Assess, Assist, Arrange.</p></fn><fn id="table2fn9"><p><sup>i</sup>HMD: head-mounted display.</p></fn><fn id="table2fn10"><p><sup>j</sup>D-ID: deidentification.</p></fn><fn id="table2fn11"><p><sup>k</sup>API: application programming interface.</p></fn><fn id="table2fn12"><p><sup>l</sup>EHR: electronic health record.</p></fn><fn id="table2fn13"><p><sup>m</sup>MIMIC-III: Medical Information Mart for Intensive Care III.</p></fn></table-wrap-foot></table-wrap></sec><sec id="s3-5"><title>Design Choices</title><p>The designs of GenAI-supported virtual patients can be classified into three distinct categories: (1) input, (2) output, and (3) avatar. First, in terms of input methods, 11 of 15 studies reported that participants interacted with the virtual patient by entering text [<xref ref-type="bibr" rid="ref33">33</xref>-<xref ref-type="bibr" rid="ref35">35</xref>,<xref ref-type="bibr" rid="ref37">37</xref>,<xref ref-type="bibr" rid="ref39">39</xref>-<xref ref-type="bibr" rid="ref41">41</xref>,<xref ref-type="bibr" rid="ref43">43</xref>,<xref ref-type="bibr" rid="ref47">47</xref>]. Meanwhile, 5 studies allowed participants to communicate verbally using voice input [<xref ref-type="bibr" rid="ref36">36</xref>,<xref ref-type="bibr" rid="ref38">38</xref>,<xref ref-type="bibr" rid="ref44">44</xref>-<xref ref-type="bibr" rid="ref46">46</xref>]. Additionally, 1 study incorporated a hybrid approach, enabling participants to use either speech-to-text functionality via a microphone or direct text input through a designated field [<xref ref-type="bibr" rid="ref42">42</xref>]. Next, the output modalities of the virtual patient corresponded closely to the input mechanisms. In 9 studies, the virtual patient responded to participants via text-based communication [<xref ref-type="bibr" rid="ref33">33</xref>-<xref ref-type="bibr" rid="ref35">35</xref>,<xref ref-type="bibr" rid="ref37">37</xref>,<xref ref-type="bibr" rid="ref39">39</xref>-<xref ref-type="bibr" rid="ref41">41</xref>,<xref ref-type="bibr" rid="ref43">43</xref>,<xref ref-type="bibr" rid="ref47">47</xref>]. In contrast, 5 studies featured virtual patients capable of generating human-like voice responses [<xref ref-type="bibr" rid="ref36">36</xref>,<xref ref-type="bibr" rid="ref38">38</xref>,<xref ref-type="bibr" rid="ref44">44</xref>-<xref ref-type="bibr" rid="ref46">46</xref>]. Notably, 1 study highlighted a more versatile approach, where the virtual patient was designed to provide responses to both text and synthesized speech [<xref ref-type="bibr" rid="ref42">42</xref>].</p><p>Regarding the avatar design of the virtual patient in the included studies, 6 studies used a 3D-embodied virtual patient to enhance realism and immersion for participants [<xref ref-type="bibr" rid="ref36">36</xref>,<xref ref-type="bibr" rid="ref38">38</xref>,<xref ref-type="bibr" rid="ref42">42</xref>,<xref ref-type="bibr" rid="ref44">44</xref>-<xref ref-type="bibr" rid="ref46">46</xref>]. For instance, a study [<xref ref-type="bibr" rid="ref38">38</xref>] integrated a mixed reality tool, allowing participants not only to visually perceive the virtual patient within a digital environment but also to physically interact with a corresponding manikin. This setup enabled the virtual patient to display various injuries, movements, and facial expressions aligned with speech production, respiration patterns, and pain-related vocalizations. Similarly, research by Borg et al [<xref ref-type="bibr" rid="ref36">36</xref>] implemented a virtual patient embedded within a robotic system. Their robot featured a 3-degree-of-freedom neck and an animated face, facilitating flexible head movements and expressive emotional displays. Mool et al [<xref ref-type="bibr" rid="ref46">46</xref>] also used a 3D avatar, though it was noted to be largely static within the examination room environment. In contrast, the remaining 9 studies used virtual patients without avatars, relying solely on alternative interaction modalities [<xref ref-type="bibr" rid="ref33">33</xref>-<xref ref-type="bibr" rid="ref37">37</xref>,<xref ref-type="bibr" rid="ref39">39</xref>-<xref ref-type="bibr" rid="ref41">41</xref>,<xref ref-type="bibr" rid="ref43">43</xref>,<xref ref-type="bibr" rid="ref47">47</xref>].</p><p>In the 15 included studies, interventions involving participants interacting with GenAI-supported virtual patients lasted less than 10 minutes in 5 cases [<xref ref-type="bibr" rid="ref33">33</xref>,<xref ref-type="bibr" rid="ref38">38</xref>-<xref ref-type="bibr" rid="ref41">41</xref>]. Three studies reported intervention durations exceeding 20 minutes [<xref ref-type="bibr" rid="ref43">43</xref>-<xref ref-type="bibr" rid="ref45">45</xref>], while the remaining 5 did not specify the duration [<xref ref-type="bibr" rid="ref34">34</xref>-<xref ref-type="bibr" rid="ref37">37</xref>,<xref ref-type="bibr" rid="ref42">42</xref>]. Moreover, 7 studies featured only a single session, regardless of the intervention length [<xref ref-type="bibr" rid="ref33">33</xref>,<xref ref-type="bibr" rid="ref35">35</xref>,<xref ref-type="bibr" rid="ref38">38</xref>,<xref ref-type="bibr" rid="ref39">39</xref>,<xref ref-type="bibr" rid="ref42">42</xref>,<xref ref-type="bibr" rid="ref44">44</xref>,<xref ref-type="bibr" rid="ref45">45</xref>].</p></sec><sec id="s3-6"><title>Technological Architecture</title><p>A total of 13 studies explicitly stated that they used an OpenAI GPT model for generating the virtual patient [<xref ref-type="bibr" rid="ref33">33</xref>-<xref ref-type="bibr" rid="ref44">44</xref>,<xref ref-type="bibr" rid="ref46">46</xref>]. Further, 1 study used a fine-tuned model from a different provider [<xref ref-type="bibr" rid="ref45">45</xref>]. Yu et al [<xref ref-type="bibr" rid="ref47">47</xref>] evaluated a broader range of models for their AIPatient system, including Claude models (Haiku, Sonnet variants), GPT-family models (GPT-4 Turbo, GPT-4o, and GPT-3.5 Turbo), and open-source models (DeepSeekV3 671B, Qwen3-32B, and LLaMa-3 70B). All studies specified the foundational GenAI model used. A total of 14 studies provided detailed patient case information as part of the prompt to enhance the virtual patient&#x2019;s responses [<xref ref-type="bibr" rid="ref33">33</xref>-<xref ref-type="bibr" rid="ref35">35</xref>,<xref ref-type="bibr" rid="ref38">38</xref>,<xref ref-type="bibr" rid="ref40">40</xref>-<xref ref-type="bibr" rid="ref47">47</xref>]. For instance, in a study [<xref ref-type="bibr" rid="ref36">36</xref>], the prompt consisted of a structured patient case description, the previous 10 turns of dialog, and an instruction to generate the next line of conversation. Yu et al [<xref ref-type="bibr" rid="ref47">47</xref>] took a sophisticated approach, integrating real electronic health record data from the MIMIC-III (Medical Information Mart for Intensive Care III) database and incorporating personality profiles based on the Big Five framework to simulate diverse and realistic patient behaviors. In contrast, 1 study used a simpler prompt, wherein the GenAI was instructed merely to assume the role of a virtual patient with a specified condition (eg, respiratory distress) and engage in dialog with participants acting as nurses, without requiring additional case-specific details [<xref ref-type="bibr" rid="ref39">39</xref>].</p></sec><sec id="s3-7"><title>Educational Strategies</title><p>A total of 3 studies explicitly referenced established educational frameworks to inform their design. The study by Kim et al [<xref ref-type="bibr" rid="ref44">44</xref>] applied an instructional design model for AI education and the technology acceptance model. The study by Luo et al [<xref ref-type="bibr" rid="ref45">45</xref>] used Kolb&#x2019;s experiential learning cycle and the Calgary-Cambridge communication framework, while the study by Chinwong et al [<xref ref-type="bibr" rid="ref43">43</xref>] was grounded in the 5As framework for smoking cessation counseling. The study by Kim et al [<xref ref-type="bibr" rid="ref44">44</xref>] emphasized the critical role of patient case design, noting that such cases serve as the foundational structure of the curriculum, functioning as learning triggers and providing a platform for students to engage in cognitive processes reflective of physicians&#x2019; workplace reasoning. Given the high-fidelity patient simulation and the level of control afforded by GenAI, this innovative approach has the potential to enhance medical education curricula, offering valuable benefits for both students and educators. In contrast, the remaining 12 studies did not specify the application of any educational theory to inform the design of the virtual patient or the overall study methodology [<xref ref-type="bibr" rid="ref33">33</xref>-<xref ref-type="bibr" rid="ref42">42</xref>,<xref ref-type="bibr" rid="ref46">46</xref>,<xref ref-type="bibr" rid="ref47">47</xref>].</p><p>To address the RQ2 concerning the educational effectiveness and learner outcomes of GenAI-supported virtual patients, the following sections analyze the extracted data, which are comprehensively tabulated in <xref ref-type="table" rid="table3">Table 3</xref>.</p><table-wrap id="t3" position="float"><label>Table 3.</label><caption><p>Study design, educational purpose, intervention details, measured outcomes, and primary results of the 15 included studies.</p></caption><table id="table3" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Reference citation</td><td align="left" valign="bottom">Study design<break/>(participants, n)</td><td align="left" valign="bottom">Educational purpose</td><td align="left" valign="bottom">Session (duration), n</td><td align="left" valign="bottom">Outcomes</td><td align="left" valign="bottom">Validity or reliability test</td><td align="left" valign="bottom">Results of intervention</td></tr></thead><tbody><tr><td align="left" valign="top">[<xref ref-type="bibr" rid="ref39">39</xref>]</td><td align="left" valign="top">Cross-sectional study (n=12)</td><td align="left" valign="top">Patient communication</td><td align="left" valign="top">1 (10 min)</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Ease of use of ChatGPT</p></list-item><list-item><p>Learning engagement with ChatGPT</p></list-item><list-item><p>Recognition of the usefulness of ChatGPT in clinical education</p></list-item><list-item><p>Performance in virtual patient interaction</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>N/A<sup><xref ref-type="table-fn" rid="table3fn1">a</xref></sup></p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Students responded positively to ChatGPT, finding it accessible, engaging, and valuable as a training tool. Those with stronger interaction skills tended to perform better overall. Key communication attributes such as clarity, relevance, and usefulness were linked to stronger outcomes.</p></list-item></list></td></tr><tr><td align="left" valign="top">[<xref ref-type="bibr" rid="ref40">40</xref>]</td><td align="left" valign="top">Quasi-experimental study within design (n=6)</td><td align="left" valign="top">Radiographers&#x2019; communication skills with patients with claustrophobia</td><td align="left" valign="top">10 (2 min)</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Simulation success rate</p></list-item><list-item><p>Radiographers&#x2019; communication skills</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>N/A</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>A total of 60 simulations were conducted, achieving a success rate of 96.7% (58/60). ChatGPT-3.5 exhibited errors in 40% (12/30) of the simulations, while ChatGPT-4 showed no errors. The simulation of clinical scenarios via ChatGPT proves valuable in assessing and testing radiographers&#x2019; communication skills, especially in managing patients with claustrophobia during MRI.<sup><xref ref-type="table-fn" rid="table3fn2">b</xref></sup></p></list-item></list></td></tr><tr><td align="left" valign="top">[<xref ref-type="bibr" rid="ref33">33</xref>]</td><td align="left" valign="top">Cross-sectional study (n=106)</td><td align="left" valign="top">Patient history taking</td><td align="left" valign="top">1 (8 minutes)</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Quality of OpenAI GPT-4&#x2019;s role-play capability</p></list-item><list-item><p>Completeness of history taking</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Interrater reliability, measured by Cohen &#x03BA;</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>OpenAI GPT-4 demonstrated highly realistic medical responses, with over 99% deemed plausible. Its evaluations closely matched human ratings overall, though some feedback categories showed weaker agreement where OpenAI GPT-4&#x2019;s assessments were more detailed or differed from human perspectives.</p></list-item></list></td></tr><tr><td align="left" valign="top">[<xref ref-type="bibr" rid="ref34">34</xref>]</td><td align="left" valign="top">Cross-sectional study (n=28)</td><td align="left" valign="top">Patient history taking for medical students</td><td align="left" valign="top">N/A</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>The performance of OpenAI GPT as a simulated patient</p></list-item><list-item><p>Chatbot&#x2019;s usability</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>N/A</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>When questions were explicitly covered by the script (n=502, 60.3%), the GPT-provided answers were mostly based on explicit script information (n=471, 94.4%).</p></list-item></list></td></tr><tr><td align="left" valign="top">[<xref ref-type="bibr" rid="ref35">35</xref>]</td><td align="left" valign="top">Quasi-experimental study (intervention group n=35, control group n=110)</td><td align="left" valign="top">Medical students&#x2019; interview skills</td><td align="left" valign="top">1 (N/A)</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>The scores related to medical interviewing in the pre-CC<sup><xref ref-type="table-fn" rid="table3fn3">c</xref></sup> OSCE<sup><xref ref-type="table-fn" rid="table3fn4">d</xref></sup></p></list-item><list-item><p>Simulation-based training quality</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>N/A</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Students in the AI<sup><xref ref-type="table-fn" rid="table3fn5">e</xref></sup>-supported group performed better in medical interviews compared to those in the control group. An inverse relationship was noted between their self-reported confidence scores and earlier examination results. Importantly, no safety issues were identified throughout the study.</p></list-item></list></td></tr><tr><td align="left" valign="top">[<xref ref-type="bibr" rid="ref41">41</xref>]</td><td align="left" valign="top">Randomized controlled trial (control group n=11, feedback group n=10)</td><td align="left" valign="top">Clinical decision-making in medical students</td><td align="left" valign="top">4 (6 min)</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>The performance of the participants</p></list-item><list-item><p>Clinical reasoning ability</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>N/A</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Medical students showed notable improvement when provided feedback. Initially, both the feedback and control groups performed similarly, confirming balanced assignment. By the end, the feedback group scored significantly higher overall, particularly in creating context and gathering information during clinical decision-making. However, there was no marked progress in their question-focusing skills.</p></list-item></list></td></tr><tr><td align="left" valign="top">[<xref ref-type="bibr" rid="ref42">42</xref>]</td><td align="left" valign="top">Cross-sectional study (n=15)</td><td align="left" valign="top">Anesthesia training</td><td align="left" valign="top">1 (N/A)</td><td align="left" valign="top">Students&#x2019; perception of the virtual patient:<list list-type="bullet"><list-item><p>Intuitive</p></list-item><list-item><p>User-friendly</p></list-item><list-item><p>Accuracy</p></list-item><list-item><p>Usability (use the model comfortably)</p></list-item><list-item><p>Feasibility</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>N/A</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>The survey of 15 anesthetists revealed that the tool was generally well received. It had a median rating of 9 out of 10 for how intuitive and user-friendly it was, and a score of 8 out of 10 for simulating realistic patient responses and behaviors. Furthermore, 87% of the participants reported feeling comfortable using the model, suggesting strong confidence in its design and functionality. It seems the tool succeeded in both usability and clinical accuracy.</p></list-item></list></td></tr><tr><td align="left" valign="top">[<xref ref-type="bibr" rid="ref38">38</xref>]</td><td align="left" valign="top">Cross-sectional study (n=24)</td><td align="left" valign="top">Communication training in an emergency (ie, car accident)</td><td align="left" valign="top">1 (6&#x2010;10 min)</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Perception of voice quality</p></list-item><list-item><p>Usability of voice interactions</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>N/A</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>The usability assessment of the virtual patient yielded moderately positive feedback, with particularly favorable scores in habitability and likeability. However, the roughly 3-second delay in response time detracted from the fluidity of interactions. MFRs<sup><xref ref-type="table-fn" rid="table3fn6">f</xref></sup> found it natural to evaluate the virtual patient&#x2019;s physiological state through verbal questions, but they also noted limitations in the dialog flow, especially the virtual patient&#x2019;s inability to initiate conversation. A key insight emerged around the potential of using domain-specific prompt engineering to guide responders more effectively during training.</p></list-item></list></td></tr><tr><td align="left" valign="top">[<xref ref-type="bibr" rid="ref36">36</xref>]</td><td align="left" valign="top">Quasi-experimental study within design (n=15)</td><td align="left" valign="top">CR<sup><xref ref-type="table-fn" rid="table3fn7">g</xref></sup> training in rheumatology, comparing a social robotic VP<sup><xref ref-type="table-fn" rid="table3fn8">h</xref></sup> platform (LLM<sup><xref ref-type="table-fn" rid="table3fn9">i</xref></sup>-enhanced) to a conventional computer-based platform</td><td align="left" valign="top">1 VP case per platform (order counterbalanced). Duration not specified</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Virtual patient evaluation</p></list-item><list-item><p>Qualitative experiences of clinical reasoning, communication, and emotional skill training</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>N/A</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Quantitative: the social robotic platform was rated significantly higher for authenticity (mean 4.5 vs 3.9, <italic>P</italic>=.04) and overall learning effect (mean 4.4 vs 4.1, <italic>P</italic>=.01).</p></list-item><list-item><p>Qualitative: students found the robot superior for training CR, communication, and emotional skills, despite noting technical limitations.</p></list-item></list></td></tr><tr><td align="left" valign="top">[<xref ref-type="bibr" rid="ref43">43</xref>]</td><td align="left" valign="top">Single group quasi-experimental, prepost (n=145)</td><td align="left" valign="top">To practice smoking cessation counseling using the 5As<sup><xref ref-type="table-fn" rid="table3fn10">j</xref></sup> framework with an AI-simulated patient</td><td align="left" valign="top">Practice over 3 weeks (unrestricted frequency or duration), followed by a 2-hour classroom discussion session</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Student satisfaction</p></list-item><list-item><p>Perceived learning impact</p></list-item><list-item><p>Perceived benefits</p></list-item><list-item><p>Perceived difficulties</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>N/A</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>66% of students were satisfied. Further, 84.4% reported improved understanding. Key benefits included self-assessment and adaptability. Major challenges were technical issues (88.3%) and a lack of AI understanding (58.6%).</p></list-item></list></td></tr><tr><td align="left" valign="top">[<xref ref-type="bibr" rid="ref37">37</xref>]</td><td align="left" valign="top">Proof-of-concept observational study (n=28)</td><td align="left" valign="top">To train communication techniques (eg, empathy and motivational interviewing) for mental health encounters using an AI chatbot with real-time feedback</td><td align="left" valign="top">2 chat sessions (20 minutes each) with 2 different AI-simulated patients</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Accuracy of AI-generated feedback (expert-evaluated)</p></list-item><list-item><p>Participant perception of feedback</p></list-item><list-item><p>Change in frequency of communication techniques</p></list-item><list-item><p>Perceived benefit for clinical practice</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>N/A</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>85.38% of real-time feedback was partially or totally correct. Further, 87.27% of participants found the feedback helpful. A significant increase in the use of targeted techniques was observed from chat 1 to chat 2 (Poisson regression, <italic>P</italic>&#x003C;.001). Over 80% agreed that the training helped them practice and apply new techniques.</p></list-item></list></td></tr><tr><td align="left" valign="top">[<xref ref-type="bibr" rid="ref44">44</xref>]</td><td align="left" valign="top">Single group quasi-experimental, prepost (n=28)</td><td align="left" valign="top">To train health assessment and therapeutic communication skills for patients with acute appendicitis using a GPT-based VP in VR<sup><xref ref-type="table-fn" rid="table3fn11">k</xref></sup></td><td align="left" valign="top">1 session (1 hour total for interaction and practice)</td><td align="left" valign="top">Quantitative:<list list-type="bullet"><list-item><p>Usability</p></list-item><list-item><p>Perceived virtual learning environment (immersion, usefulness, etc)</p></list-item><list-item><p>Self-efficacy in communication</p></list-item></list><break/>Qualitative:<list list-type="bullet"><list-item><p>Training experiences</p></list-item><list-item><p>Accuracy of AI dialogs</p></list-item><list-item><p>Safety of AI dialogs</p></list-item><list-item><p>Relevance of AI dialogs</p></list-item><list-item><p>Readability of AI dialogs</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Reliability tested via Cronbach &#x03B1; for all scales: usability (&#x03B1;=.85), perceived virtual learning environment (&#x03B1;=.95), self-efficacy of communication (&#x03B1;=.88).</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Self-efficacy in communication increased significantly (pre: 61.57, post: 64.32, <italic>P</italic>=.009). The highest scores were for immersion and function accessibility. Qualitative themes highlighted educational benefits and technical limitations. AI dialog scored highest on readability and lowest on accuracy.</p></list-item></list></td></tr><tr><td align="left" valign="top">[<xref ref-type="bibr" rid="ref45">45</xref>]</td><td align="left" valign="top">RCT<sup><xref ref-type="table-fn" rid="table3fn12">l</xref></sup> (LLMDP<sup><xref ref-type="table-fn" rid="table3fn13">m</xref></sup> group n=13, control group n=13)</td><td align="left" valign="top">To enhance ophthalmology medical history-taking skills using an LLM-based digital patient system</td><td align="left" valign="top">1 (1 h)</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Medical history-taking ability</p></list-item><list-item><p>Empathy</p></list-item><list-item><p>Student attitudes or satisfaction</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>N/A</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>The LLMDP group showed significantly higher MHTA<sup><xref ref-type="table-fn" rid="table3fn14">n</xref></sup> scores (mean 64.62, SD 9.52) vs control (54.12, SD 8.80), mean difference 10.50 points (95% CI 4.66&#x2010;16.33, <italic>P</italic>&#x003C;.001). The intervention group also demonstrated better empathy. High student satisfaction was reported, highlighting benefits for confidence and cost or time savings.</p></list-item></list></td></tr><tr><td align="left" valign="top">[<xref ref-type="bibr" rid="ref46">46</xref>]</td><td align="left" valign="top">Between-groups, mixed methods study (GenAI<sup><xref ref-type="table-fn" rid="table3fn15">o</xref></sup> groups n=13, ePBLM<sup><xref ref-type="table-fn" rid="table3fn16">p</xref></sup> group n=13)</td><td align="left" valign="top">To explore what happens when a GenAI-enabled virtual patient is introduced within a PBL<sup><xref ref-type="table-fn" rid="table3fn17">q</xref></sup> tutorial for history-taking, compared to a legacy multimedia database system (ePBLM)</td><td align="left" valign="top">1 (GenAI groups: 55&#x2010;65 min; ePBLM groups: 36&#x2010;39 min)</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Primary (observational): characterization of student interactions with the patient modality and with each other during history-taking (via descriptive observation of audio-recordings), secondary (survey or quiz)</p></list-item><list-item><p>Learner perceptions: 8-item survey (5-point Likert) assessing perceptions of PBL tutorial quality (clinical accuracy, enjoyability, teamwork, etc)</p></list-item><list-item><p>Patient history recall: 11-item short-answer quiz assessing recall of patient case information (immediate and 2-week delayed)</p></list-item></list></td><td align="left" valign="top">Validity:<list list-type="bullet"><list-item><p>Survey items were judged by a behavioral scientist to be typical of those in other simulated patient studies.</p></list-item><list-item><p>Quiz items were verified by 2 faculty members for consistency with the case and readability.</p></list-item></list><break/>Reliability:<list list-type="bullet"><list-item><p>Quiz grading: first 5 quizzes graded independently by 2 coinvestigators to establish consistency; remaining 52 quizzes graded by 1 investigator.</p></list-item><list-item><p>Statistical tests: linear regression (OLS<sup><xref ref-type="table-fn" rid="table3fn18">r</xref></sup> estimation), 2-tailed with &#x03B1;=.05.</p></list-item></list></td><td align="left" valign="top">Observational findings:<list list-type="bullet"><list-item><p>GenAI presented essential case content accurately but occasionally deviated on nonessential content (eg, embellished responses, inconsistent headache history, and unreported marijuana use).</p></list-item><list-item><p>GenAI groups took &#x2248;10 minutes longer on history-taking, partly due to collaborative troubleshooting of AI interaction.</p></list-item><list-item><p>Students treated the avatar like a sophisticated &#x201C;question base,&#x201D; using closed-ended questions and jargon, not realistic patient interviewing.</p></list-item><list-item><p>One GenAI group showed more experimental, anthropomorphizing engagement (eg, using the patient&#x2019;s name and inferring attitude).</p></list-item></list><break/>Survey results:<list list-type="bullet"><list-item><p>GenAI students rated their experience significantly higher than their prior ePBLM experiences (mean total score 34.38 vs 28.38 pretutorial, <italic>P</italic>=.003).</p></list-item><list-item><p>Largest gains were in &#x201C;simulates clinical experiences accurately&#x201D; (mean increase of 1.6 points).</p></list-item></list><break/>Quiz results:<list list-type="bullet"><list-item><p>Immediate recall was near ceiling in both groups (GenAI: 10.10/11; ePBLM: 9.40/11).</p></list-item><list-item><p>Delayed recall (2 weeks) decreased significantly in both groups (GenAI: 8.63; ePBLM: 7.94), but the rate of forgetting did not differ by condition (<italic>P</italic>=.052 for condition effect).</p></list-item></list></td></tr><tr><td align="left" valign="top">[<xref ref-type="bibr" rid="ref47">47</xref>]</td><td align="left" valign="top">Paired crossover study (n=20 medical students)</td><td align="left" valign="top">To evaluate the fidelity, usability, and educational effectiveness of the AIPatient<sup><xref ref-type="table-fn" rid="table3fn19">s</xref></sup> system compared to H-SPs<sup><xref ref-type="table-fn" rid="table3fn20">t</xref></sup> in medical history-taking</td><td align="left" valign="top">4 interactions per student (2 cases&#x00D7;2 modalities: AIPatient and H-SP). Duration not explicitly specified</td><td align="left" valign="top">Primary (system performance):<list list-type="bullet"><list-item><p>Knowledgebase validity: NER<sup><xref ref-type="table-fn" rid="table3fn21">u</xref></sup> <italic>F</italic><sub>1</sub>-score</p></list-item><list-item><p>QA<sup><xref ref-type="table-fn" rid="table3fn22">v</xref></sup> accuracy: percentage correct in EHR<sup><xref ref-type="table-fn" rid="table3fn23">w</xref></sup>-based QA</p></list-item><list-item><p>Readability: Flesch Reading Ease, Flesch-Kincaid Grade Level</p></list-item><list-item><p>Robustness: accuracy variance with paraphrased questions (ANOVA)</p></list-item><list-item><p>Stability: accuracy variance with 32 personality types (ANOVA and data loss percentage)</p></list-item></list><break/>Secondary (user study):<list list-type="bullet"><list-item><p>Fidelity, usability, and educational effectiveness: measured via 5-point Likert-scale questionnaire</p></list-item><list-item><p>Clinical information gathering: OSCE-style checklist</p></list-item><list-item><p>Qualitative feedback: semistructured interviews</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Intercoder reliability: <italic>F</italic><sub>1</sub>-score (0.79) for NER gold-standard labels; Cohen &#x03BA; (0.92) for QA accuracy ratings</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>AIPatient matched or exceeded H-SPs across most metrics.</p></list-item><list-item><p>Significant advantages: emotional realism (4.37 vs 3.74, <italic>P</italic>&#x003C;.01), technical reliability (4.39 vs 3.79, <italic>P</italic>&#x003C;.01), improving clinical reasoning skills (4.41 vs 3.97, <italic>P</italic>&#x003C;.05).</p></list-item><list-item><p>OSCE checklist: AIPatient performed comparably or better in supporting clinical reasoning and information elicitation.</p></list-item><list-item><p>Qualitative: students found AIPatient emotionally expressive, pedagogically valuable, efficient, consistent, and usable. Identified areas for improvement included verbosity and handling of nonstandard queries.</p></list-item></list></td></tr></tbody></table><table-wrap-foot><fn id="table3fn1"><p><sup>a</sup>N/A: not applicable.</p></fn><fn id="table3fn2"><p><sup>b</sup>MRI: magnetic resonance imaging</p></fn><fn id="table3fn3"><p><sup>c</sup>Pre-CC: preclinical clerkship.</p></fn><fn id="table3fn4"><p><sup>d</sup>OSCE: objective structured clinical examination.</p></fn><fn id="table3fn5"><p><sup>e</sup>AI: artificial intelligence.</p></fn><fn id="table3fn6"><p><sup>f</sup>MFR: medical first responder.</p></fn><fn id="table3fn7"><p><sup>g</sup>CR: clinical reasoning.</p></fn><fn id="table3fn8"><p><sup>h</sup>VP: virtual patient.</p></fn><fn id="table3fn9"><p><sup>i</sup>LLM: large language model.</p></fn><fn id="table3fn10"><p><sup>j</sup>5A: Ask, Advise, Assess, Assist, Arrange.</p></fn><fn id="table3fn11"><p><sup>k</sup>VR: virtual reality.</p></fn><fn id="table3fn12"><p><sup>l</sup>RCT: randomized controlled trial. </p></fn><fn id="table3fn13"><p><sup>m</sup>LLMDP: large language model-based digital patient.</p></fn><fn id="table3fn14"><p><sup>n</sup>MHTA: mental health therapy aide. </p></fn><fn id="table3fn15"><p><sup>o</sup>GenAI: generative artificial intelligence.</p></fn><fn id="table3fn16"><p><sup>p</sup>ePBLM: electronic problem-based learning.</p></fn><fn id="table3fn17"><p><sup>q</sup>PBL: problem-based learning.</p></fn><fn id="table3fn18"><p><sup>r</sup>OLS: ordinary least squares.</p></fn><fn id="table3fn19"><p><sup>s</sup>AIPatient: artificial intelligence patient.</p></fn><fn id="table3fn20"><p><sup>t</sup>H-SP: human-simulated patient. </p></fn><fn id="table3fn21"><p><sup>u</sup>NER: named entity recognition.</p></fn><fn id="table3fn22"><p><sup>v</sup>QA: question answering.</p></fn><fn id="table3fn23"><p><sup>w</sup>EHR: electronic health record.</p></fn></table-wrap-foot></table-wrap><sec id="s3-7-1"><title>Educational Benefits and Learner Outcomes</title><p>Among all 15 included studies, 14 assessed participants&#x2019; perceptions of the GenAI-supported virtual patient, examining factors such as usefulness [<xref ref-type="bibr" rid="ref39">39</xref>], accuracy [<xref ref-type="bibr" rid="ref42">42</xref>,<xref ref-type="bibr" rid="ref44">44</xref>], and the authenticity of the patient encounter [<xref ref-type="bibr" rid="ref36">36</xref>,<xref ref-type="bibr" rid="ref37">37</xref>,<xref ref-type="bibr" rid="ref46">46</xref>,<xref ref-type="bibr" rid="ref47">47</xref>]. Additionally, 11 studies investigated the impact of GenAI-supported virtual patients on participants&#x2019; learning outcomes, including performance [<xref ref-type="bibr" rid="ref33">33</xref>-<xref ref-type="bibr" rid="ref35">35</xref>,<xref ref-type="bibr" rid="ref39">39</xref>,<xref ref-type="bibr" rid="ref41">41</xref>,<xref ref-type="bibr" rid="ref45">45</xref>,<xref ref-type="bibr" rid="ref47">47</xref>], communication skills [<xref ref-type="bibr" rid="ref38">38</xref>,<xref ref-type="bibr" rid="ref40">40</xref>,<xref ref-type="bibr" rid="ref44">44</xref>,<xref ref-type="bibr" rid="ref46">46</xref>], and clinical reasoning ability [<xref ref-type="bibr" rid="ref36">36</xref>,<xref ref-type="bibr" rid="ref41">41</xref>,<xref ref-type="bibr" rid="ref47">47</xref>]. Across multiple studies, GenAI-supported virtual patients demonstrated substantial benefits in health care education. Students rated the tool as accessible, engaging, and pedagogically valuable [<xref ref-type="bibr" rid="ref39">39</xref>,<xref ref-type="bibr" rid="ref42">42</xref>,<xref ref-type="bibr" rid="ref46">46</xref>,<xref ref-type="bibr" rid="ref47">47</xref>], with advanced models such as ChatGPT-4 achieving high scenario completion rates and error-free performance [<xref ref-type="bibr" rid="ref33">33</xref>,<xref ref-type="bibr" rid="ref40">40</xref>]. The simulations yielded highly realistic clinical responses and plausible feedback [<xref ref-type="bibr" rid="ref33">33</xref>,<xref ref-type="bibr" rid="ref35">35</xref>], and emotionally expressive interactions were found to be appropriate and contextually accurate [<xref ref-type="bibr" rid="ref36">36</xref>,<xref ref-type="bibr" rid="ref47">47</xref>]. Further studies confirmed strong user experiences and authentic communication [<xref ref-type="bibr" rid="ref38">38</xref>], with improved outcomes in medical interview performance [<xref ref-type="bibr" rid="ref35">35</xref>,<xref ref-type="bibr" rid="ref45">45</xref>] and decision-making when feedback mechanisms were integrated [<xref ref-type="bibr" rid="ref37">37</xref>,<xref ref-type="bibr" rid="ref41">41</xref>]. The tool also earned high marks for intuitiveness and user comfort [<xref ref-type="bibr" rid="ref42">42</xref>], enhanced perceptions of authenticity and learning effectiveness over conventional approaches [<xref ref-type="bibr" rid="ref36">36</xref>,<xref ref-type="bibr" rid="ref47">47</xref>], and received positive feedback on self-efficacy and the learning environment [<xref ref-type="bibr" rid="ref44">44</xref>]. Further, 1 study reported that 84.4% of participants perceived an improved understanding of the subject matter [<xref ref-type="bibr" rid="ref43">43</xref>].</p></sec><sec id="s3-7-2"><title>Evaluation Designs and Assessment Strategies</title><p>A total of 6 studies used an experimental design incorporating a control group to assess the effects of GenAI-supported virtual patients [<xref ref-type="bibr" rid="ref35">35</xref>,<xref ref-type="bibr" rid="ref36">36</xref>,<xref ref-type="bibr" rid="ref41">41</xref>,<xref ref-type="bibr" rid="ref45">45</xref>-<xref ref-type="bibr" rid="ref47">47</xref>]. Within these 6 studies, 5 of them compared the GenAI virtual patient with traditional pedagogical methods or a non-AI control [<xref ref-type="bibr" rid="ref35">35</xref>,<xref ref-type="bibr" rid="ref36">36</xref>,<xref ref-type="bibr" rid="ref45">45</xref>-<xref ref-type="bibr" rid="ref47">47</xref>]. The remaining 1 study used GenAI virtual patients in both the intervention and control groups, but with variations in functionality (conversation-only vs conversation+feedback) [<xref ref-type="bibr" rid="ref41">41</xref>]. The other 9 studies did not include comparative analyses between GenAI and alternative conditions or conduct prepost intervention comparisons [<xref ref-type="bibr" rid="ref33">33</xref>,<xref ref-type="bibr" rid="ref34">34</xref>,<xref ref-type="bibr" rid="ref37">37</xref>-<xref ref-type="bibr" rid="ref40">40</xref>,<xref ref-type="bibr" rid="ref42">42</xref>-<xref ref-type="bibr" rid="ref44">44</xref>]. None of the studies in this review used a longitudinal design. Regarding data collection approaches, 9 studies relied exclusively on quantitative data [<xref ref-type="bibr" rid="ref34">34</xref>,<xref ref-type="bibr" rid="ref35">35</xref>,<xref ref-type="bibr" rid="ref38">38</xref>-<xref ref-type="bibr" rid="ref42">42</xref>,<xref ref-type="bibr" rid="ref45">45</xref>,<xref ref-type="bibr" rid="ref47">47</xref>], while the remaining 5 studies incorporated both quantitative and qualitative methods [<xref ref-type="bibr" rid="ref36">36</xref>,<xref ref-type="bibr" rid="ref37">37</xref>,<xref ref-type="bibr" rid="ref43">43</xref>,<xref ref-type="bibr" rid="ref44">44</xref>,<xref ref-type="bibr" rid="ref46">46</xref>].</p></sec><sec id="s3-7-3"><title>Design Limitations</title><p>Despite their promise, the GenAI-supported virtual patients reviewed exhibit several inherent limitations. Critically, behavioral and emotional complexity remains underdeveloped. While some avatars [<xref ref-type="bibr" rid="ref42">42</xref>,<xref ref-type="bibr" rid="ref44">44</xref>,<xref ref-type="bibr" rid="ref46">46</xref>] incorporated basic movements or facial expressions, these were often simplistic and failed to fully replicate the nuanced nonverbal cues (eg, subtle pain indicators, authentic gaze patterns, and culturally specific gestures) essential for holistic clinical assessment and empathy training. For instance, Mool et al [<xref ref-type="bibr" rid="ref46">46</xref>] noted that their 3D avatar was largely static within the examination room environment, with no description of physical movement. Emotional responsiveness was largely superficial, relying on prescripted ranges or simplistic vocalizations [<xref ref-type="bibr" rid="ref38">38</xref>,<xref ref-type="bibr" rid="ref42">42</xref>], rather than dynamically adapting emotional states based on learner interaction or physiological parameters. Furthermore, adaptability beyond dialog was constrained; virtual patients struggled to simulate evolving physical symptoms (eg, changing breath sounds and deteriorating vital signs) or accurately respond to physical examination maneuvers performed by learners within simulations. Memory and longitudinal consistency across interactions were absent, preventing virtual patients from recalling prior conversations or learner actions to build continuity. Finally, the underlying GenAI models (predominantly ChatGPT variants) introduced inherent biases and factual inaccuracies in medical content, alongside potential cultural insensitivities, raising concerns about the reliability and safety of the clinical scenarios portrayed. These limitations collectively restrict the virtual patients&#x2019; ability to fully mirror the dynamism and unpredictability of real patient encounters.</p><p>In terms of limitations in research methodology, 13 of the 15 studies did not report the evaluation of the reliability or validity of their assessment instruments [<xref ref-type="bibr" rid="ref33">33</xref>-<xref ref-type="bibr" rid="ref43">43</xref>,<xref ref-type="bibr" rid="ref45">45</xref>,<xref ref-type="bibr" rid="ref46">46</xref>]. Further, 2 studies explicitly tested reliability [<xref ref-type="bibr" rid="ref44">44</xref>,<xref ref-type="bibr" rid="ref47">47</xref>]. Furthermore, 9 studies used designs without a control group and did not measure the same variables before and after the intervention [<xref ref-type="bibr" rid="ref33">33</xref>,<xref ref-type="bibr" rid="ref34">34</xref>,<xref ref-type="bibr" rid="ref36">36</xref>-<xref ref-type="bibr" rid="ref40">40</xref>,<xref ref-type="bibr" rid="ref42">42</xref>,<xref ref-type="bibr" rid="ref45">45</xref>,<xref ref-type="bibr" rid="ref47">47</xref>], making it unclear whether the reported outcomes resulted from the intervention itself or differed across other conditions.</p></sec></sec></sec><sec id="s4" sec-type="discussion"><title>Discussion</title><sec id="s4-1"><title>Principal Findings</title><p>The current systematic review provides a comprehensive synthesis of the emerging evidence on GenAI-supported virtual patients in health care education, focusing on implementation characteristics, educational impact, and methodological considerations. Building on the descriptive findings presented in the Results section, the following sections offer analytical comparisons across study designs, AI modalities, and educational purposes, and map identified limitations to established learning theories to guide future development.</p></sec><sec id="s4-2"><title>Synthesis of Findings by Study Design, AI Modality, and Educational Purpose</title><p>When examining the evidence base by study design, a clear pattern emerges regarding the strength of causal inferences that can be drawn. The 3 RCTs provide the most robust evidence, demonstrating significant improvements in clinical decision-making [<xref ref-type="bibr" rid="ref41">41</xref>], ophthalmology history-taking skills [<xref ref-type="bibr" rid="ref45">45</xref>], and medical history-taking performance [<xref ref-type="bibr" rid="ref47">47</xref>] attributable to GenAI-supported virtual patient interventions. In contrast, the cross-sectional and quasi-experimental studies, while consistently reporting positive user perceptions and self-reported skill gains [<xref ref-type="bibr" rid="ref33">33</xref>,<xref ref-type="bibr" rid="ref34">34</xref>,<xref ref-type="bibr" rid="ref38">38</xref>,<xref ref-type="bibr" rid="ref39">39</xref>,<xref ref-type="bibr" rid="ref42">42</xref>,<xref ref-type="bibr" rid="ref46">46</xref>], are more susceptible to enthusiasm bias and cannot establish definitive causal links between the intervention and learning outcomes. This methodological gradient underscores the need for more rigorous, controlled trials to move the field beyond proof-of-concept toward evidence-based practice.</p><p>Critically, the overall evidence base is characterized by small sample sizes, brief intervention durations (often single sessions under 10 minutes), and a near-complete absence of longitudinal follow-up. Consequently, while the studies demonstrate that GenAI-supported virtual patients are feasible and acceptable to learners, claims regarding their educational effectiveness must remain preliminary. The current findings support feasibility and acceptability more strongly than demonstrable learning gains or transfer to clinical practice.</p><p>Analysis by AI modality reveals differential alignment with educational objectives. Studies using embodied, voice-based virtual patients [<xref ref-type="bibr" rid="ref36">36</xref>,<xref ref-type="bibr" rid="ref38">38</xref>,<xref ref-type="bibr" rid="ref44">44</xref>-<xref ref-type="bibr" rid="ref46">46</xref>] predominantly targeted communication skills and emotional realism as primary outcomes. For example, Borg et al [<xref ref-type="bibr" rid="ref36">36</xref>] found that a social robotic platform with 3D embodiment was rated significantly higher for authenticity and emotional skill training compared to a conventional computer-based platform, suggesting that physical presence and nonverbal cues may be particularly valuable for interpersonal skill development. In contrast, text-based virtual patient studies [<xref ref-type="bibr" rid="ref33">33</xref>-<xref ref-type="bibr" rid="ref35">35</xref>,<xref ref-type="bibr" rid="ref37">37</xref>,<xref ref-type="bibr" rid="ref39">39</xref>-<xref ref-type="bibr" rid="ref41">41</xref>,<xref ref-type="bibr" rid="ref43">43</xref>,<xref ref-type="bibr" rid="ref47">47</xref>] more frequently targeted clinical reasoning, diagnostic accuracy, and history-taking performance. Yu et al [<xref ref-type="bibr" rid="ref47">47</xref>] directly compared their text-based AI patient to human-simulated patients and found comparable or superior support for clinical reasoning, indicating that for cognitive skills such as information gathering and diagnostic thinking, sophisticated text-based interactions may be as effective as, or more effective than, human simulations. This modality-outcome alignment has practical implications for instructional design: educators should select or design GenAI virtual patient platforms based on the specific competencies they aim to develop.</p><p>When considering educational purpose, studies targeting communication skills [<xref ref-type="bibr" rid="ref38">38</xref>,<xref ref-type="bibr" rid="ref40">40</xref>,<xref ref-type="bibr" rid="ref44">44</xref>,<xref ref-type="bibr" rid="ref46">46</xref>] consistently reported improvements in learner empathy, interviewing technique, and patient interaction quality. For instance, Maquil&#x00F3;n et al [<xref ref-type="bibr" rid="ref38">38</xref>] found that medical first responders rated the virtual patient&#x2019;s voice interactions as natural for assessing physiological state, while Mool et al [<xref ref-type="bibr" rid="ref46">46</xref>] observed that some students anthropomorphized the avatar, using the patient&#x2019;s name and inferring attitudes&#x2014;behaviors indicative of authentic communication practice. Studies focused on clinical reasoning [<xref ref-type="bibr" rid="ref36">36</xref>,<xref ref-type="bibr" rid="ref41">41</xref>,<xref ref-type="bibr" rid="ref47">47</xref>] demonstrated gains in diagnostic accuracy, information gathering, and decision-making, with Br&#x00FC;gge et al [<xref ref-type="bibr" rid="ref41">41</xref>] showing that feedback-enhanced interactions led to significantly higher clinical reasoning scores. This pattern suggests that GenAI-supported virtual patients can be effectively tailored to specific educational objectives, with modality and design features aligning with targeted learning outcomes.</p></sec><sec id="s4-3"><title>Theoretical Implications of Design Limitations</title><p>From the perspective of experiential learning theory [<xref ref-type="bibr" rid="ref48">48</xref>], the lack of longitudinal consistency and evolving patient states prevents learners from engaging in the full cycle of concrete experience, reflective observation, abstract conceptualization, and active experimentation across repeated encounters. Without memory across sessions, students cannot build upon prior interactions or observe the consequences of their clinical decisions over time&#x2014;a core component of developing clinical expertise. This limitation suggests that future GenAI virtual patient designs should incorporate persistent patient states and longitudinal case progression to support complete experiential learning cycles.</p><p>Cognitive load theory [<xref ref-type="bibr" rid="ref49">49</xref>,<xref ref-type="bibr" rid="ref50">50</xref>] provides another lens for interpreting current limitations. The technical glitches, inconsistent responses, and need to troubleshoot AI interactions observed in several studies [<xref ref-type="bibr" rid="ref38">38</xref>,<xref ref-type="bibr" rid="ref43">43</xref>,<xref ref-type="bibr" rid="ref46">46</xref>] impose extraneous cognitive load, diverting mental resources away from the germane load essential for schema construction and clinical reasoning development. Reducing technical unpredictability through more robust prompt engineering and model selection, as demonstrated by Yu et al [<xref ref-type="bibr" rid="ref47">47</xref>], could minimize extraneous load and optimize cognitive resources for learning.</p><p>Furthermore, the absence of adaptive emotional and behavioral complexity limits opportunities for the sustained, feedback-driven practice necessary to refine complex interpersonal skills. Well-established principles of skill acquisition emphasize the importance of repeated engagement with authentic tasks, immediate feedback, and progressive challenge&#x2014;elements that current GenAI virtual patients only partially provide. Incorporating dynamically adjusting emotional states based on learner interactions, informed by frameworks such as the Big Five personality model used by Yu et al [<xref ref-type="bibr" rid="ref47">47</xref>], could better support the deliberate practice of communication and empathy.</p></sec><sec id="s4-4"><title>Implications in Practice and Research</title><p>The implementation of GenAI-supported virtual patients in health care education carries significant practical implications. Educators and curriculum designers must carefully consider the technological infrastructure required to support diverse interaction modalities&#x2014;text, voice, and hybrid formats&#x2014;to cater to varied learning preferences and replicate realistic clinical scenarios. The integration of dynamic, AI-generated responses demands ongoing technical oversight and periodic updates to ensure the content remains accurate and relevant. Furthermore, the relatively brief interaction sessions observed in this study suggest that more extended, immersive simulations are needed to mirror the complexity of real-world clinical practice. This calls for interdisciplinary collaborations between educators, clinicians, and technology developers to ensure that practical implementations not only leverage the technical advantages of GenAI but also align with educational objectives and clinical standards.</p><p>The finding that only 3 of the 15 included studies were explicitly grounded in established educational frameworks highlights an important area for future research. The limited adoption of frameworks indicates that many current approaches are primarily driven by technological innovation rather than pedagogical insight. By integrating robust theoretical perspectives, the design of GenAI-supported virtual patients can be enhanced to facilitate richer, more targeted learning experiences that promote the development of clinical reasoning and decision-making skills. Furthermore, theoretical models can help in formulating clear hypotheses about how these advanced simulations influence learner outcomes, guiding both the design of interventions and the evaluation of their effectiveness [<xref ref-type="bibr" rid="ref51">51</xref>]. Encouraging further interdisciplinary studies that explicitly link technology-driven interventions with established learning theories will be essential in building a more comprehensive understanding of how these innovations can transform health care education.</p><p>For research design, only 5 of the 15 studies incorporated experimental designs with control groups, allowing for direct comparisons between traditional simulation methods and novel AI-driven interventions [<xref ref-type="bibr" rid="ref35">35</xref>,<xref ref-type="bibr" rid="ref36">36</xref>,<xref ref-type="bibr" rid="ref41">41</xref>,<xref ref-type="bibr" rid="ref45">45</xref>,<xref ref-type="bibr" rid="ref47">47</xref>]. This experimental rigor has been crucial in establishing a baseline understanding of the potential advantages of GenAI-enhanced simulations, especially in relation to improving communication skills and clinical reasoning. The remaining 10 studies typically used within-group designs with mixed methods or solely quantitative approaches, providing insights into user perceptions and immediate learning outcomes [<xref ref-type="bibr" rid="ref33">33</xref>,<xref ref-type="bibr" rid="ref34">34</xref>,<xref ref-type="bibr" rid="ref37">37</xref>-<xref ref-type="bibr" rid="ref40">40</xref>,<xref ref-type="bibr" rid="ref42">42</xref>-<xref ref-type="bibr" rid="ref44">44</xref>,<xref ref-type="bibr" rid="ref46">46</xref>]. As highlighted in the synthesis above, the current evidence base supports feasibility and acceptability more strongly than conclusive educational effectiveness. Future research must therefore prioritize larger, rigorous trials with long-term follow-up and objective measures of skill transfer to clinical practice.</p><p>To provide a more balanced perspective, the potential benefits of GenAI-supported virtual patients must be considered alongside their well-documented risks and ethical challenges. Our findings regarding positive learner perceptions and skill development exist within a context of significant technological limitations. Key concerns include algorithmic bias inherent in the training data of LLMs, which could reinforce stereotypes or lead to culturally insensitive patient portrayals, thereby misinforming clinical empathy and communication [<xref ref-type="bibr" rid="ref52">52</xref>,<xref ref-type="bibr" rid="ref53">53</xref>] Data privacy and security present another critical challenge, as sensitive health information used in prompt engineering or generated during simulated dialogs requires robust safeguards to comply with regulations such as HIPAA (Health Insurance Portability and Accountability Act) or GDPR (General Data Protection Regulation) [<xref ref-type="bibr" rid="ref54">54</xref>,<xref ref-type="bibr" rid="ref55">55</xref>]. Most critically, the tendency of GenAI models to generate plausible but incorrect or fabricated information&#x2014;known as &#x201C;hallucinations&#x201D;&#x2014;poses a direct threat to clinical accuracy [<xref ref-type="bibr" rid="ref56">56</xref>]. In a health care education setting, an AI virtual patient providing factually wrong symptoms, pathophysiology, or treatment responses could seriously compromise foundational medical knowledge and patient safety. Therefore, the implementation of these tools necessitates rigorous validation frameworks, ongoing human oversight, and clear institutional guidelines to mitigate these risks, ensuring that innovation in simulation does not come at the cost of pedagogical integrity or ethical responsibility.</p></sec><sec id="s4-5"><title>Integration of Methodological Quality</title><p>The risk of bias assessment conducted for this review underscores a critical consideration when interpreting its findings. While 3 RCTs demonstrated relatively strong methodological rigor (low to low-moderate risk) [<xref ref-type="bibr" rid="ref57">57</xref>], most of the evidence derives from quasi-experimental and cross-sectional studies with moderate to high risk of bias [<xref ref-type="bibr" rid="ref58">58</xref>]. This methodological landscape indicates that the current evidence base is still in a formative, proof-of-concept stage [<xref ref-type="bibr" rid="ref59">59</xref>]. The prevalent limitations&#x2014;such as small sample sizes, absence of control groups, lack of blinding, and reliance on unvalidated or self-reported outcomes&#x2014;suggest that the positive results regarding user acceptance, perceived learning, and skill improvement should be viewed as promising preliminary signals rather than conclusive evidence of efficacy [<xref ref-type="bibr" rid="ref60">60</xref>-<xref ref-type="bibr" rid="ref63">63</xref>]. These design weaknesses increase the risk of overestimating positive effects due to confounding, measurement bias, or participant enthusiasm for novel technology [<xref ref-type="bibr" rid="ref64">64</xref>,<xref ref-type="bibr" rid="ref65">65</xref>]. Therefore, the synthesized findings, particularly those related to educational impact, must be interpreted with appropriate caution. This appraisal directly informs the primary recommendation of this review: future research must prioritize methodological robustness, including larger-scale randomized designs with active control groups [<xref ref-type="bibr" rid="ref66">66</xref>], longitudinal follow-up [<xref ref-type="bibr" rid="ref67">67</xref>], and the use of validated, objective outcome measures [<xref ref-type="bibr" rid="ref65">65</xref>] to establish a more definitive evidence base for the educational effectiveness of GenAI-supported virtual patients.</p><p>Furthermore, our analysis identified variable reporting of ethical considerations&#x2014;such as institutional review board approval and data security measures&#x2014;across the included studies. While systematic reviews themselves do not require ethical approval, transparency in primary research is a cornerstone of methodological rigor and trustworthiness [<xref ref-type="bibr" rid="ref68">68</xref>]. Moving forward, consistent and explicit ethical reporting should be considered a standard in this domain, especially when research involves simulated patient interactions and learner data, to ensure the credibility and safe translation of findings into educational practice [<xref ref-type="bibr" rid="ref53">53</xref>,<xref ref-type="bibr" rid="ref54">54</xref>].</p></sec><sec id="s4-6"><title>Limitations</title><p>This review has several limitations that should be addressed when interpreting its findings. First, while our systematic search was comprehensive across several databases, the inclusion criteria limited the review to English-language studies, potentially excluding relevant research published in other languages or in alternative repositories. Second, many of the primary studies included in this review are constrained by methodological limitations, such as small sample sizes, short simulation durations, and an overreliance on self-reported outcomes, which restrict the generalizability of the findings. Third, the heterogeneity in study designs and assessment tools across the interventions complicates direct comparisons and synthesis of outcomes. Furthermore, our search was limited to published, peer-reviewed literature and did not include gray literature sources such as clinical trial registries or preprint servers. While this decision aligns with standard systematic review practices, it is particularly consequential in a rapidly evolving field where early innovations often first appear as preprints or technical reports. Consequently, the review may not capture the most recent developments or emerging design approaches that have not yet undergone formal peer review. Future updates to this review should consider expanding the search to include gray literature as the evidence base matures. Lastly, the current review only included 15 papers; this limited number of studies may not capture the full spectrum of innovative practices and outcomes in this rapidly evolving field, thereby constraining the robustness of the conclusions drawn.</p></sec><sec id="s4-7"><title>Conclusions</title><p>In summary, this review confirms that GenAI-supported virtual patients offer notable advances in adaptability and interactivity. This study is innovative as it constitutes the first dedicated synthesis of this specific technological application in health care education. It differs from prior reviews of virtual patients or GenAI by focusing exclusively on their intersection, thereby isolating the unique capabilities and challenges introduced by GenAI. The review brings to the field a foundational framework that classifies key design dimensions, evaluates educational impact, and identifies critical gaps, setting a clear agenda for subsequent research. The real-world implications are significant: for educators and technologists, it provides an evidence-based roadmap for developing more effective, theory-informed simulations; for institutions, it highlights the practical considerations and potential transformative value of integrating these tools to modernize clinical skills training and address scalability in health care education.</p></sec></sec></body><back><ack><p>This study would not have been possible without the support of the Hong Kong Metropolitan University. The authors declare the use of generative artificial intelligence (GenAI) in the research and writing process. According to the GAIDeT (Generative Artificial Intelligence Delegation Taxonomy; 2025), the following tasks were delegated to GenAI tools under full human supervision: proofreading and editing. The GenAI tool used was Grammarly (Superhuman Platform). Responsibility for this final paper lies entirely with the authors. GenAI tools are not listed as authors and do not bear responsibility for the outcomes.</p></ack><notes><sec><title>Funding</title><p>No funding was received for conducting this study.</p></sec><sec><title>Data Availability</title><p>Data is presented in <xref ref-type="table" rid="table2">Tables 2</xref> and <xref ref-type="table" rid="table3">3</xref>.</p></sec></notes><fn-group><fn fn-type="con"><p>Conceptualization: JJ, TTOK, JYHW</p><p>Formal analysis: MZY</p><p>Investigation: MZY</p><p>Methodology: JJ</p><p>Supervision: JYHW</p><p>Writing &#x2013; original draft: JJ</p><p>Writing &#x2013; review &#x0026; editing: JJ, MZY, TTOK, JYHW</p><p>JJ and MZY are the cofirst authors.</p></fn><fn fn-type="conflict"><p>None declared.</p></fn></fn-group><glossary><title>Abbreviations</title><def-list><def-item><term id="abb1">AI</term><def><p>artificial intelligence</p></def></def-item><def-item><term id="abb2">GDPR</term><def><p>General Data Protection Regulation</p></def></def-item><def-item><term id="abb3">GenAI</term><def><p>generative artificial intelligence</p></def></def-item><def-item><term id="abb4">GRADE</term><def><p>Grading of Recommendations, Assessment, Development, and Evaluation</p></def></def-item><def-item><term id="abb5">HIPAA</term><def><p>Health Insurance Portability and Accountability Act</p></def></def-item><def-item><term id="abb6">JBI</term><def><p>Joanna Briggs Institute</p></def></def-item><def-item><term id="abb7">LLM</term><def><p>large language model</p></def></def-item><def-item><term id="abb8">MeSH</term><def><p>Medical Subject Headings</p></def></def-item><def-item><term id="abb9">MIMIC-III</term><def><p>Medical Information Mart for Intensive Care III</p></def></def-item><def-item><term id="abb10">PRISMA</term><def><p>Preferred Reporting Items for Systematic Reviews and Meta-Analyses</p></def></def-item><def-item><term id="abb11">PRISMA-S</term><def><p>Preferred Reporting Items for Systematic Reviews and Meta-Analyses Literature Search Extension</p></def></def-item><def-item><term id="abb12">RCT</term><def><p>randomized controlled trial</p></def></def-item><def-item><term id="abb13">RQ</term><def><p>research question</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kononowicz</surname><given-names>AA</given-names> </name><name name-style="western"><surname>Woodham</surname><given-names>LA</given-names> </name><name name-style="western"><surname>Edelbring</surname><given-names>S</given-names> </name><etal/></person-group><article-title>Virtual patient simulations in health professions education: systematic review and meta-analysis by the digital health education collaboration</article-title><source>J Med Internet Res</source><year>2019</year><month>07</month><day>2</day><volume>21</volume><issue>7</issue><fpage>e14676</fpage><pub-id pub-id-type="doi">10.2196/14676</pub-id><pub-id pub-id-type="medline">31267981</pub-id></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Cook</surname><given-names>DA</given-names> </name><name name-style="western"><surname>Erwin</surname><given-names>PJ</given-names> </name><name name-style="western"><surname>Triola</surname><given-names>MM</given-names> </name></person-group><article-title>Computerized virtual patients in health professions education: a systematic review and meta-analysis</article-title><source>Acad Med</source><year>2010</year><month>10</month><volume>85</volume><issue>10</issue><fpage>1589</fpage><lpage>1602</lpage><pub-id pub-id-type="doi">10.1097/ACM.0b013e3181edfe13</pub-id><pub-id pub-id-type="medline">20703150</pub-id></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="report"><person-group person-group-type="author"><name name-style="western"><surname>Lioce</surname><given-names>L</given-names> </name><name name-style="western"><surname>Lopreiato</surname><given-names>J</given-names> </name><name name-style="western"><surname>Anderson</surname><given-names>M</given-names></name><etal/></person-group><article-title>Healthcare simulation dictionary&#x2013;third edition</article-title><year>2024</year><access-date>2026-04-21</access-date><publisher-name>Agency for Healthcare Research and Quality</publisher-name><comment><ext-link ext-link-type="uri" xlink:href="https://www.ssih.org/sites/default/files/2025-03/Healthcare-Simulation-Dictionary-3.pdf">https://www.ssih.org/sites/default/files/2025-03/Healthcare-Simulation-Dictionary-3.pdf</ext-link></comment></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>D&#x00E1;vidovics</surname><given-names>A</given-names> </name><name name-style="western"><surname>D&#x00E1;vidovics</surname><given-names>K</given-names> </name><name name-style="western"><surname>Hillebrand</surname><given-names>P</given-names> </name><name name-style="western"><surname>Rendeki</surname><given-names>S</given-names> </name><name name-style="western"><surname>N&#x00E9;meth</surname><given-names>T</given-names> </name></person-group><article-title>Virtual patient simulation to enhance medical students&#x2019; clinical communication and decision-making skills: a pilot study</article-title><source>BMC Med Educ</source><year>2026</year><volume>26</volume><issue>1</issue><fpage>171</fpage><pub-id pub-id-type="doi">10.1186/s12909-025-08507-7</pub-id></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Furey</surname><given-names>P</given-names> </name><name name-style="western"><surname>Town</surname><given-names>A</given-names> </name><name name-style="western"><surname>Sumera</surname><given-names>K</given-names> </name><name name-style="western"><surname>Webster</surname><given-names>CA</given-names> </name></person-group><article-title>Approaches for integrating generative artificial intelligence in emergency healthcare education within higher education: a scoping review</article-title><source>Crit Care Innov</source><year>2024</year><access-date>2026-05-04</access-date><volume>7</volume><issue>2</issue><fpage>34</fpage><lpage>54</lpage><comment><ext-link ext-link-type="uri" xlink:href="https://irep.ntu.ac.uk/id/eprint/51748/1/1913244_Webster.pdf">https://irep.ntu.ac.uk/id/eprint/51748/1/1913244_Webster.pdf</ext-link></comment></nlm-citation></ref><ref id="ref6"><label>6</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Rodriguez</surname><given-names>DV</given-names> </name><name name-style="western"><surname>Lawrence</surname><given-names>K</given-names> </name><name name-style="western"><surname>Gonzalez</surname><given-names>J</given-names> </name><etal/></person-group><article-title>Leveraging generative AI tools to support the development of digital solutions in health care research: case study</article-title><source>JMIR Hum Factors</source><year>2023</year><volume>11</volume><fpage>e52885</fpage><pub-id pub-id-type="doi">10.2196/52885</pub-id></nlm-citation></ref><ref id="ref7"><label>7</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Potter</surname><given-names>L</given-names> </name><name name-style="western"><surname>Jefferies</surname><given-names>C</given-names> </name></person-group><article-title>Enhancing communication and clinical reasoning in medical education: building virtual patients with generative AI</article-title><source>Future Healthcare J</source><year>2024</year><month>04</month><volume>11</volume><fpage>100043</fpage><pub-id pub-id-type="doi">10.1016/j.fhj.2024.100043</pub-id></nlm-citation></ref><ref id="ref8"><label>8</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Yaqoob</surname><given-names>A</given-names> </name><name name-style="western"><surname>Verma</surname><given-names>NK</given-names> </name><name name-style="western"><surname>Aziz</surname><given-names>RM</given-names> </name></person-group><article-title>Optimizing gene selection and cancer classification with hybrid sine cosine and cuckoo search algorithm</article-title><source>J Med Syst</source><year>2024</year><month>01</month><day>9</day><volume>48</volume><issue>1</issue><fpage>10</fpage><pub-id pub-id-type="doi">10.1007/s10916-023-02031-1</pub-id><pub-id pub-id-type="medline">38193948</pub-id></nlm-citation></ref><ref id="ref9"><label>9</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Talbot</surname><given-names>TB</given-names> </name><name name-style="western"><surname>Sagae</surname><given-names>K</given-names> </name><name name-style="western"><surname>John</surname><given-names>B</given-names> </name><name name-style="western"><surname>Rizzo</surname><given-names>AA</given-names> </name></person-group><article-title>Designing useful virtual standardized patient encounters</article-title><source>I/ITSEC Proceedings</source><year>2012</year><access-date>2026-04-25</access-date><fpage>1</fpage><lpage>11</lpage><comment><ext-link ext-link-type="uri" xlink:href="http://deadnet.se:8080/ict.usc.edu/pubs/Designing%20Useful%20Virtual%20Standardized%20Patient%20Encounters.pdf">http://deadnet.se:8080/ict.usc.edu/pubs/Designing%20Useful%20Virtual%20Standardized%20Patient%20Encounters.pdf</ext-link></comment></nlm-citation></ref><ref id="ref10"><label>10</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Reed</surname><given-names>T</given-names> </name><name name-style="western"><surname>Pirotte</surname><given-names>M</given-names> </name><name name-style="western"><surname>McHugh</surname><given-names>M</given-names> </name><etal/></person-group><article-title>Simulation-based mastery learning improves medical student performance and retention of core clinical skills</article-title><source>Sim Healthcare</source><year>2016</year><volume>11</volume><issue>3</issue><fpage>173</fpage><lpage>180</lpage><pub-id pub-id-type="doi">10.1097/SIH.0000000000000154</pub-id></nlm-citation></ref><ref id="ref11"><label>11</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Elendu</surname><given-names>C</given-names> </name><name name-style="western"><surname>Amaechi</surname><given-names>DC</given-names> </name><name name-style="western"><surname>Okatta</surname><given-names>AU</given-names> </name><etal/></person-group><article-title>The impact of simulation-based training in medical education: a review</article-title><source>Medicine (Baltimore)</source><year>2024</year><month>07</month><day>5</day><volume>103</volume><issue>27</issue><fpage>e38813</fpage><pub-id pub-id-type="doi">10.1097/MD.0000000000038813</pub-id><pub-id pub-id-type="medline">38968472</pub-id></nlm-citation></ref><ref id="ref12"><label>12</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Padilha</surname><given-names>JM</given-names> </name><name name-style="western"><surname>Machado</surname><given-names>PP</given-names> </name><name name-style="western"><surname>Ribeiro</surname><given-names>A</given-names> </name><name name-style="western"><surname>Ramos</surname><given-names>J</given-names> </name><name name-style="western"><surname>Costa</surname><given-names>P</given-names> </name></person-group><article-title>Clinical virtual simulation in nursing education: randomized controlled trial</article-title><source>J Med Internet Res</source><year>2019</year><month>03</month><day>18</day><volume>21</volume><issue>3</issue><fpage>e11529</fpage><pub-id pub-id-type="doi">10.2196/11529</pub-id><pub-id pub-id-type="medline">30882355</pub-id></nlm-citation></ref><ref id="ref13"><label>13</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kubin</surname><given-names>L</given-names> </name><name name-style="western"><surname>Fogg</surname><given-names>N</given-names> </name><name name-style="western"><surname>Trinka</surname><given-names>M</given-names> </name></person-group><article-title>Alternative clinical learning experiences for nursing education using virtual individual patients</article-title><source>Nurs Educ Perspect</source><year>2023</year><volume>44</volume><issue>4</issue><fpage>259</fpage><lpage>260</lpage><pub-id pub-id-type="doi">10.1097/01.NEP.0000000000001066</pub-id><pub-id pub-id-type="medline">36240018</pub-id></nlm-citation></ref><ref id="ref14"><label>14</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Williams</surname><given-names>R</given-names> </name><name name-style="western"><surname>Helmer</surname><given-names>B</given-names> </name><name name-style="western"><surname>Elliott</surname><given-names>A</given-names> </name><name name-style="western"><surname>Robinson</surname><given-names>D</given-names> </name><name name-style="western"><surname>Jimenez</surname><given-names>FA</given-names> </name><name name-style="western"><surname>Faragher</surname><given-names>ME</given-names> </name></person-group><article-title>Navigating the virtual frontier: a virtual patient simulation pilot study in prelicensure baccalaureate nursing education</article-title><source>Clin Simul Nurs</source><year>2024</year><month>09</month><volume>94</volume><fpage>101589</fpage><pub-id pub-id-type="doi">10.1016/j.ecns.2024.101589</pub-id></nlm-citation></ref><ref id="ref15"><label>15</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Hamilton</surname><given-names>A</given-names> </name><name name-style="western"><surname>Molzahn</surname><given-names>A</given-names> </name><name name-style="western"><surname>McLemore</surname><given-names>K</given-names> </name></person-group><article-title>The evolution from standardized to virtual patients in medical education</article-title><source>Cureus</source><year>2024</year><month>10</month><day>10</day><volume>16</volume><issue>10</issue><fpage>e71224</fpage><pub-id pub-id-type="doi">10.7759/cureus.71224</pub-id><pub-id pub-id-type="medline">39525234</pub-id></nlm-citation></ref><ref id="ref16"><label>16</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>He</surname><given-names>H</given-names> </name><name name-style="western"><surname>Xu</surname><given-names>X</given-names> </name><name name-style="western"><surname>Li</surname><given-names>S</given-names> </name><name name-style="western"><surname>Bueno-Vesga</surname><given-names>JA</given-names> </name><name name-style="western"><surname>Duan</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Gu</surname><given-names>Y</given-names> </name></person-group><article-title>Training nursing skills in a generative artificial intelligence-enhanced virtual reality patient encounter simulation: a qualitative study from a student perspective</article-title><source>Int J Educ Technol High Educ</source><year>2026</year><volume>23</volume><issue>1</issue><fpage>12</fpage><pub-id pub-id-type="doi">10.1186/s41239-026-00587-9</pub-id></nlm-citation></ref><ref id="ref17"><label>17</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>McCoy</surname><given-names>L</given-names> </name><name name-style="western"><surname>Pettit</surname><given-names>RK</given-names> </name><name name-style="western"><surname>Lewis</surname><given-names>JH</given-names> </name><etal/></person-group><article-title>Developing technology-enhanced active learning for medical education: challenges, solutions, and future directions</article-title><source>Acad Med</source><year>2015</year><month>04</month><day>1</day><volume>115</volume><issue>4</issue><fpage>202</fpage><lpage>211</lpage><pub-id pub-id-type="doi">10.7556/jaoa.2015.042</pub-id></nlm-citation></ref><ref id="ref18"><label>18</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Andrade</surname><given-names>R</given-names> </name></person-group><article-title>An exploration of virtual standardized patients and their effect on clinical readiness in pharmacy education</article-title><source>Curr Pharm Teach Learn</source><year>2026</year><month>05</month><volume>18</volume><issue>5</issue><fpage>102599</fpage><pub-id pub-id-type="doi">10.1016/j.cptl.2026.102599</pub-id><pub-id pub-id-type="medline">41713008</pub-id></nlm-citation></ref><ref id="ref19"><label>19</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sengar</surname><given-names>SS</given-names> </name><name name-style="western"><surname>Hasan</surname><given-names>AB</given-names> </name><name name-style="western"><surname>Kumar</surname><given-names>S</given-names> </name><name name-style="western"><surname>Carroll</surname><given-names>F</given-names> </name></person-group><article-title>Generative artificial intelligence: a systematic review and applications</article-title><source>Multimed Tools Appl</source><year>2025</year><volume>84</volume><issue>21</issue><fpage>23661</fpage><lpage>23700</lpage><pub-id pub-id-type="doi">10.1007/s11042-024-20016-1</pub-id></nlm-citation></ref><ref id="ref20"><label>20</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wang</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>L</given-names> </name><name name-style="western"><surname>Siau</surname><given-names>KL</given-names> </name></person-group><article-title>Human-centered interaction in virtual worlds: a new era of generative artificial intelligence and metaverse</article-title><source>Int J Hum-Comput Interact</source><year>2025</year><month>01</month><day>17</day><volume>41</volume><issue>2</issue><fpage>1459</fpage><lpage>1501</lpage><pub-id pub-id-type="doi">10.1080/10447318.2024.2316376</pub-id><pub-id pub-id-type="medline">38784821</pub-id></nlm-citation></ref><ref id="ref21"><label>21</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Garc&#x00ED;a-Torres</surname><given-names>D</given-names> </name><name name-style="western"><surname>Vicente Ripoll</surname><given-names>MA</given-names> </name><name name-style="western"><surname>Fern&#x00E1;ndez Peris</surname><given-names>C</given-names> </name><name name-style="western"><surname>Mira Solves</surname><given-names>JJ</given-names> </name></person-group><article-title>Enhancing clinical reasoning with virtual patients: a hybrid systematic review combining human reviewers and ChatGPT</article-title><source>Healthcare (Basel)</source><year>2024</year><volume>12</volume><issue>22</issue><fpage>2241</fpage><pub-id pub-id-type="doi">10.3390/healthcare12222241</pub-id></nlm-citation></ref><ref id="ref22"><label>22</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Jay</surname><given-names>R</given-names> </name><name name-style="western"><surname>Sandars</surname><given-names>J</given-names> </name><name name-style="western"><surname>Patel</surname><given-names>R</given-names> </name><etal/></person-group><article-title>The use of virtual patients to provide feedback on clinical reasoning: a systematic review</article-title><source>Acad Med</source><year>2025</year><month>02</month><day>1</day><volume>100</volume><issue>2</issue><fpage>229</fpage><lpage>238</lpage><pub-id pub-id-type="doi">10.1097/ACM.0000000000005908</pub-id><pub-id pub-id-type="medline">39485118</pub-id></nlm-citation></ref><ref id="ref23"><label>23</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Neo</surname><given-names>NWS</given-names> </name><name name-style="western"><surname>Gunawan</surname><given-names>J</given-names> </name><name name-style="western"><surname>Levett-Jones</surname><given-names>T</given-names> </name><name name-style="western"><surname>Khoo</surname><given-names>ET</given-names> </name><name name-style="western"><surname>Chua</surname><given-names>WL</given-names> </name><name name-style="western"><surname>Liaw</surname><given-names>SY</given-names> </name></person-group><article-title>Generative artificial intelligence in healthcare simulation-based education: a scoping review</article-title><source>Clin Simul Nurs</source><year>2025</year><month>11</month><volume>108</volume><fpage>101819</fpage><pub-id pub-id-type="doi">10.1016/j.ecns.2025.101819</pub-id></nlm-citation></ref><ref id="ref24"><label>24</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Janssen</surname><given-names>E</given-names> </name><name name-style="western"><surname>McLagan</surname><given-names>R</given-names> </name><name name-style="western"><surname>Habeck</surname><given-names>J</given-names> </name><name name-style="western"><surname>Chung</surname><given-names>SY</given-names> </name><name name-style="western"><surname>McArthur</surname><given-names>EC</given-names> </name><name name-style="western"><surname>Anderson</surname><given-names>P</given-names> </name></person-group><article-title>Barriers to breakthroughs: a scoping review of generative AI in healthcare simulation</article-title><source>Clin Simul Nurs</source><year>2025</year><month>10</month><volume>107</volume><fpage>101791</fpage><pub-id pub-id-type="doi">10.1016/j.ecns.2025.101791</pub-id><pub-id pub-id-type="medline">40339523</pub-id></nlm-citation></ref><ref id="ref25"><label>25</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Page</surname><given-names>MJ</given-names> </name><name name-style="western"><surname>McKenzie</surname><given-names>JE</given-names> </name><name name-style="western"><surname>Bossuyt</surname><given-names>PM</given-names> </name><etal/></person-group><article-title>The PRISMA 2020 statement: an updated guideline for reporting systematic reviews</article-title><source>BMJ</source><year>2021</year><month>03</month><day>29</day><volume>372</volume><fpage>n71</fpage><pub-id pub-id-type="doi">10.1136/bmj.n71</pub-id><pub-id pub-id-type="medline">33782057</pub-id></nlm-citation></ref><ref id="ref26"><label>26</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Campbell</surname><given-names>M</given-names> </name><name name-style="western"><surname>McKenzie</surname><given-names>JE</given-names> </name><name name-style="western"><surname>Sowden</surname><given-names>A</given-names> </name><etal/></person-group><article-title>Synthesis without meta-analysis (SWiM) in systematic reviews: reporting guideline</article-title><source>BMJ</source><year>2020</year><month>01</month><day>16</day><volume>368</volume><fpage>l6890</fpage><pub-id pub-id-type="doi">10.1136/bmj.l6890</pub-id><pub-id pub-id-type="medline">31948937</pub-id></nlm-citation></ref><ref id="ref27"><label>27</label><nlm-citation citation-type="web"><article-title>Systematic review of GenAI supported virtual patient in healthcare education</article-title><source>OSF</source><access-date>2026-04-21</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://osf.io/uxcwr/overview?view_only=9c97eb9c2df64afc97719b314a0af93f">https://osf.io/uxcwr/overview?view_only=9c97eb9c2df64afc97719b314a0af93f</ext-link></comment></nlm-citation></ref><ref id="ref28"><label>28</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Rethlefsen</surname><given-names>ML</given-names> </name><name name-style="western"><surname>Kirtley</surname><given-names>S</given-names> </name><name name-style="western"><surname>Waffenschmidt</surname><given-names>S</given-names> </name><etal/></person-group><article-title>PRISMA-S: an extension to the PRISMA Statement for Reporting Literature Searches in Systematic Reviews</article-title><source>Syst Rev</source><year>2021</year><month>01</month><day>26</day><volume>10</volume><issue>1</issue><fpage>39</fpage><pub-id pub-id-type="doi">10.1186/s13643-020-01542-z</pub-id><pub-id pub-id-type="medline">33499930</pub-id></nlm-citation></ref><ref id="ref29"><label>29</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Landis</surname><given-names>JR</given-names> </name><name name-style="western"><surname>Koch</surname><given-names>GG</given-names> </name></person-group><article-title>An application of hierarchical kappa-type statistics in the assessment of majority agreement among multiple observers</article-title><source>Biometrics</source><year>1977</year><month>06</month><volume>33</volume><issue>2</issue><fpage>363</fpage><lpage>374</lpage><pub-id pub-id-type="doi">10.2307/2529786</pub-id><pub-id pub-id-type="medline">884196</pub-id></nlm-citation></ref><ref id="ref30"><label>30</label><nlm-citation citation-type="web"><article-title>JBI critical appraisal tools</article-title><source>JBI</source><access-date>2026-04-21</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://jbi.global/critical-appraisal-tools">https://jbi.global/critical-appraisal-tools</ext-link></comment></nlm-citation></ref><ref id="ref31"><label>31</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Barker</surname><given-names>TH</given-names> </name><name name-style="western"><surname>Stone</surname><given-names>JC</given-names> </name><name name-style="western"><surname>Sears</surname><given-names>K</given-names> </name><etal/></person-group><article-title>The revised JBI critical appraisal tool for the assessment of risk of bias for randomized controlled trials</article-title><source>JBI Evidence Synthesis</source><year>2023</year><volume>21</volume><issue>3</issue><fpage>494</fpage><lpage>506</lpage><pub-id pub-id-type="doi">10.11124/JBIES-22-00430</pub-id></nlm-citation></ref><ref id="ref32"><label>32</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Barker</surname><given-names>TH</given-names> </name><name name-style="western"><surname>Habibi</surname><given-names>N</given-names> </name><name name-style="western"><surname>Aromataris</surname><given-names>E</given-names> </name><etal/></person-group><article-title>The revised JBI critical appraisal tool for the assessment of risk of bias for quasi-experimental studies</article-title><source>JBI Evidence Synthesis</source><year>2024</year><volume>22</volume><issue>3</issue><fpage>378</fpage><lpage>388</lpage><pub-id pub-id-type="doi">10.11124/JBIES-23-00268</pub-id></nlm-citation></ref><ref id="ref33"><label>33</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Holderried</surname><given-names>F</given-names> </name><name name-style="western"><surname>Stegemann-Philipps</surname><given-names>C</given-names> </name><name name-style="western"><surname>Herrmann-Werner</surname><given-names>A</given-names> </name><etal/></person-group><article-title>A language model&#x2013;powered simulated patient with automated feedback for history taking: prospective study</article-title><source>JMIR Med Educ</source><year>2024</year><volume>10</volume><issue>1</issue><fpage>e59213</fpage><pub-id pub-id-type="doi">10.2196/59213</pub-id></nlm-citation></ref><ref id="ref34"><label>34</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Holderried</surname><given-names>F</given-names> </name><name name-style="western"><surname>Stegemann&#x2013;Philipps</surname><given-names>C</given-names> </name><name name-style="western"><surname>Herschbach</surname><given-names>L</given-names> </name><etal/></person-group><article-title>A generative pretrained transformer (GPT)&#x2013;powered chatbot as a simulated patient to practice history taking: prospective, mixed methods study</article-title><source>JMIR Med Educ</source><year>2024</year><volume>10</volume><issue>1</issue><fpage>e53961</fpage><pub-id pub-id-type="doi">10.2196/53961</pub-id></nlm-citation></ref><ref id="ref35"><label>35</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Yamamoto</surname><given-names>A</given-names> </name><name name-style="western"><surname>Koda</surname><given-names>M</given-names> </name><name name-style="western"><surname>Ogawa</surname><given-names>H</given-names> </name><etal/></person-group><article-title>Enhancing medical interview skills through AI-simulated patient interactions: nonrandomized controlled trial</article-title><source>JMIR Med Educ</source><year>2024</year><month>09</month><day>23</day><volume>10</volume><issue>1</issue><fpage>e58753</fpage><pub-id pub-id-type="doi">10.2196/58753</pub-id><pub-id pub-id-type="medline">39312284</pub-id></nlm-citation></ref><ref id="ref36"><label>36</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Borg</surname><given-names>A</given-names> </name><name name-style="western"><surname>Georg</surname><given-names>C</given-names> </name><name name-style="western"><surname>Jobs</surname><given-names>B</given-names> </name><etal/></person-group><article-title>Virtual patient simulations using social robotics combined with large language models for clinical reasoning training in medical education: mixed methods study</article-title><source>J Med Internet Res</source><year>2025</year><volume>27</volume><fpage>e63312</fpage><pub-id pub-id-type="doi">10.2196/63312</pub-id></nlm-citation></ref><ref id="ref37"><label>37</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Herschbach</surname><given-names>L</given-names> </name><name name-style="western"><surname>Festl-Wietek</surname><given-names>T</given-names> </name><name name-style="western"><surname>Stegemann-Philipps</surname><given-names>C</given-names> </name><etal/></person-group><article-title>Evaluation of an AI-based chatbot providing real-time feedback in communication training for mental health care professionals: proof-of-concept observational study</article-title><source>J Med Internet Res</source><year>2025</year><month>11</month><day>28</day><volume>27</volume><fpage>e82818</fpage><pub-id pub-id-type="doi">10.2196/82818</pub-id><pub-id pub-id-type="medline">41314643</pub-id></nlm-citation></ref><ref id="ref38"><label>38</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Maquil&#x00F3;n</surname><given-names>RG</given-names> </name><name name-style="western"><surname>Uhl</surname><given-names>J</given-names> </name><name name-style="western"><surname>Schrom-Feiertag</surname><given-names>H</given-names> </name><name name-style="western"><surname>Tscheligi</surname><given-names>M</given-names> </name></person-group><article-title>Integrating GPT-based AI into virtual patients to facilitate communication training among medical first responders: usability study of mixed reality simulation</article-title><source>JMIR Form Res</source><year>2024</year><month>12</month><day>11</day><volume>8</volume><fpage>e58623</fpage><pub-id pub-id-type="doi">10.2196/58623</pub-id><pub-id pub-id-type="medline">39661979</pub-id></nlm-citation></ref><ref id="ref39"><label>39</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Benfatah</surname><given-names>M</given-names> </name><name name-style="western"><surname>Marfak</surname><given-names>A</given-names> </name><name name-style="western"><surname>Saad</surname><given-names>E</given-names> </name><name name-style="western"><surname>Hilali</surname><given-names>A</given-names> </name><name name-style="western"><surname>Nejjari</surname><given-names>C</given-names> </name><name name-style="western"><surname>Youlyouz-Marfak</surname><given-names>I</given-names> </name></person-group><article-title>Assessing the efficacy of ChatGPT as a virtual patient in nursing simulation training: a study on nursing students&#x2019; experience</article-title><source>Teach Learn Nurs</source><year>2024</year><month>07</month><volume>19</volume><issue>3</issue><fpage>e486</fpage><lpage>e493</lpage><pub-id pub-id-type="doi">10.1016/j.teln.2024.02.005</pub-id></nlm-citation></ref><ref id="ref40"><label>40</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Bonfitto</surname><given-names>GR</given-names> </name><name name-style="western"><surname>Roletto</surname><given-names>A</given-names> </name><name name-style="western"><surname>Savardi</surname><given-names>M</given-names> </name><name name-style="western"><surname>Fasulo</surname><given-names>SV</given-names> </name><name name-style="western"><surname>Catania</surname><given-names>D</given-names> </name><name name-style="western"><surname>Signoroni</surname><given-names>A</given-names> </name></person-group><article-title>Harnessing ChatGPT dialogues to address claustrophobia in MRI - a radiographers&#x2019; education perspective</article-title><source>Radiography (Lond)</source><year>2024</year><month>05</month><volume>30</volume><issue>3</issue><fpage>737</fpage><lpage>744</lpage><pub-id pub-id-type="doi">10.1016/j.radi.2024.02.015</pub-id></nlm-citation></ref><ref id="ref41"><label>41</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Br&#x00FC;gge</surname><given-names>E</given-names> </name><name name-style="western"><surname>Ricchizzi</surname><given-names>S</given-names> </name><name name-style="western"><surname>Arenbeck</surname><given-names>M</given-names> </name><etal/></person-group><article-title>Large language models improve clinical decision making of medical students through patient simulation and structured feedback: a randomized controlled trial</article-title><source>BMC Med Educ</source><year>2024</year><month>11</month><day>28</day><volume>24</volume><issue>1</issue><fpage>1391</fpage><pub-id pub-id-type="doi">10.1186/s12909-024-06399-7</pub-id><pub-id pub-id-type="medline">39609823</pub-id></nlm-citation></ref><ref id="ref42"><label>42</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sardesai</surname><given-names>N</given-names> </name><name name-style="western"><surname>Russo</surname><given-names>P</given-names> </name><name name-style="western"><surname>Martin</surname><given-names>J</given-names> </name><name name-style="western"><surname>Sardesai</surname><given-names>A</given-names> </name></person-group><article-title>Utilizing generative conversational artificial intelligence to create simulated patient encounters: a pilot study for anaesthesia training</article-title><source>Postgrad Med J</source><year>2024</year><month>03</month><day>18</day><volume>100</volume><issue>1182</issue><fpage>237</fpage><lpage>241</lpage><pub-id pub-id-type="doi">10.1093/postmj/qgad137</pub-id><pub-id pub-id-type="medline">38240054</pub-id></nlm-citation></ref><ref id="ref43"><label>43</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Chinwong</surname><given-names>D</given-names> </name><name name-style="western"><surname>Penthinapong</surname><given-names>T</given-names> </name><name name-style="western"><surname>Chinwong</surname><given-names>S</given-names> </name></person-group><article-title>Integrating ChatGPT for smoking cessation counseling practice in pharmacy education: a single group quasi-experimental study</article-title><source>Tob Induc Dis</source><year>2025</year><volume>23</volume><issue>November</issue><fpage>1</fpage><lpage>12</lpage><pub-id pub-id-type="doi">10.18332/tid/211706</pub-id></nlm-citation></ref><ref id="ref44"><label>44</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kim</surname><given-names>J</given-names> </name><name name-style="western"><surname>Won</surname><given-names>J</given-names> </name><name name-style="western"><surname>Lee</surname><given-names>Y</given-names> </name></person-group><article-title>Use of a generative pre-trained transformer-based virtual patient for health assessment and communication training in nursing education: a mixed-methods study</article-title><source>Nurse Educ Pract</source><year>2025</year><month>10</month><volume>88</volume><fpage>104536</fpage><pub-id pub-id-type="doi">10.1016/j.nepr.2025.104536</pub-id></nlm-citation></ref><ref id="ref45"><label>45</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Luo</surname><given-names>MJ</given-names> </name><name name-style="western"><surname>Bi</surname><given-names>S</given-names> </name><name name-style="western"><surname>Pang</surname><given-names>J</given-names> </name><etal/></person-group><article-title>A large language model digital patient system enhances ophthalmology history taking skills</article-title><source>npj Digit Med</source><year>2025</year><volume>8</volume><issue>1</issue><fpage>502</fpage><pub-id pub-id-type="doi">10.1038/s41746-025-01841-6</pub-id></nlm-citation></ref><ref id="ref46"><label>46</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Mool</surname><given-names>A</given-names> </name><name name-style="western"><surname>Schmid</surname><given-names>J</given-names> </name><name name-style="western"><surname>Johnston</surname><given-names>T</given-names> </name><etal/></person-group><article-title>Using generative AI to simulate patient history-taking in a problem-based learning tutorial: a mixed-methods study</article-title><source>Tech Know Learn</source><year>2026</year><fpage>1</fpage><lpage>18</lpage><pub-id pub-id-type="doi">10.1007/s10758-025-09929-4</pub-id></nlm-citation></ref><ref id="ref47"><label>47</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Yu</surname><given-names>H</given-names> </name><name name-style="western"><surname>Zhou</surname><given-names>J</given-names> </name><name name-style="western"><surname>Li</surname><given-names>L</given-names> </name><etal/></person-group><article-title>Simulated patient systems powered by large language model-based AI agents offer potential for transforming medical education</article-title><source>Commun Med (Lond)</source><year>2025</year><month>12</month><day>19</day><volume>6</volume><issue>1</issue><fpage>27</fpage><pub-id pub-id-type="doi">10.1038/s43856-025-01283-x</pub-id><pub-id pub-id-type="medline">41420084</pub-id></nlm-citation></ref><ref id="ref48"><label>48</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Kolb</surname><given-names>DA</given-names> </name></person-group><source>Experiential Learning: Experience as the Source of Learning and Development</source><year>2014</year><publisher-name>FT Press</publisher-name><pub-id pub-id-type="other">0133892506</pub-id></nlm-citation></ref><ref id="ref49"><label>49</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Sweller</surname><given-names>J</given-names> </name></person-group><article-title>Cognitive load theory</article-title><source>Psychology of Learning and Motivation</source><year>2011</year><volume>55</volume><publisher-name>Academic Press</publisher-name><fpage>37</fpage><lpage>76</lpage><pub-id pub-id-type="doi">10.1016/B978-0-12-387691-1.00002-8</pub-id></nlm-citation></ref><ref id="ref50"><label>50</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Fraser</surname><given-names>KL</given-names> </name><name name-style="western"><surname>Ayres</surname><given-names>P</given-names> </name><name name-style="western"><surname>Sweller</surname><given-names>J</given-names> </name></person-group><article-title>Cognitive load theory for the design of medical simulations</article-title><source>Simul Healthcare</source><year>2015</year><volume>10</volume><issue>5</issue><fpage>295</fpage><lpage>307</lpage><pub-id pub-id-type="doi">10.1097/SIH.0000000000000097</pub-id></nlm-citation></ref><ref id="ref51"><label>51</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Thomas</surname><given-names>G</given-names> </name></person-group><article-title>What&#x2019;s the use of theory?</article-title><source>Harvard Educ Rev</source><year>1997</year><month>01</month><day>1</day><volume>67</volume><issue>1</issue><fpage>75</fpage><lpage>105</lpage><pub-id pub-id-type="doi">10.17763/haer.67.1.1x807532771w5u48</pub-id></nlm-citation></ref><ref id="ref52"><label>52</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Joseph</surname><given-names>J</given-names> </name></person-group><article-title>Algorithmic bias in public health AI: a silent threat to equity in low-resource settings</article-title><source>Front Public Health</source><year>2025</year><volume>13</volume><fpage>1643180</fpage><pub-id pub-id-type="doi">10.3389/fpubh.2025.1643180</pub-id><pub-id pub-id-type="medline">40771228</pub-id></nlm-citation></ref><ref id="ref53"><label>53</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Panch</surname><given-names>T</given-names> </name><name name-style="western"><surname>Mattie</surname><given-names>H</given-names> </name><name name-style="western"><surname>Celi</surname><given-names>LA</given-names> </name></person-group><article-title>The &#x201C;inconvenient truth&#x201D; about AI in healthcare</article-title><source>npj Digit Med</source><year>2019</year><volume>2</volume><issue>1</issue><fpage>77</fpage><pub-id pub-id-type="doi">10.1038/s41746-019-0155-4</pub-id><pub-id pub-id-type="medline">31453372</pub-id></nlm-citation></ref><ref id="ref54"><label>54</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Al-kfairy</surname><given-names>M</given-names> </name><name name-style="western"><surname>Mustafa</surname><given-names>D</given-names> </name><name name-style="western"><surname>Kshetri</surname><given-names>N</given-names> </name><name name-style="western"><surname>Insiew</surname><given-names>M</given-names> </name><name name-style="western"><surname>Alfandi</surname><given-names>O</given-names> </name></person-group><article-title>Ethical challenges and solutions of generative AI: an interdisciplinary perspective</article-title><source>Informatics (MDPI)</source><year>2024</year><volume>11</volume><issue>3</issue><fpage>58</fpage><pub-id pub-id-type="doi">10.3390/informatics11030058</pub-id></nlm-citation></ref><ref id="ref55"><label>55</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Duffourc</surname><given-names>MN</given-names> </name><name name-style="western"><surname>Gerke</surname><given-names>S</given-names> </name><name name-style="western"><surname>Kollnig</surname><given-names>K</given-names> </name></person-group><article-title>Privacy of personal data in the generative AI data lifecycle</article-title><source>NYU J Intell Prop Ent L</source><year>2024</year><access-date>2026-04-21</access-date><volume>13</volume><issue>2</issue><fpage>219</fpage><lpage>268</lpage><comment><ext-link ext-link-type="uri" xlink:href="https://jipel.law.nyu.edu//wp-content/uploads/2024/07/JIPEL-Volume-13-Number-2-Duffourc-Gerke-Kollnig.pdf">https://jipel.law.nyu.edu//wp-content/uploads/2024/07/JIPEL-Volume-13-Number-2-Duffourc-Gerke-Kollnig.pdf</ext-link></comment></nlm-citation></ref><ref id="ref56"><label>56</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Tran</surname><given-names>C</given-names> </name><name name-style="western"><surname>Hryciw</surname><given-names>BN</given-names> </name><name name-style="western"><surname>Moore</surname><given-names>SW</given-names> </name><name name-style="western"><surname>Chaput</surname><given-names>A</given-names> </name><name name-style="western"><surname>Seely</surname><given-names>AJE</given-names> </name></person-group><article-title>Perceptions and use of generative artificial intelligence in medical students: a multicenter survey</article-title><source>J Med Educ Curric Dev</source><year>2025</year><volume>12</volume><fpage>23821205251391969</fpage><pub-id pub-id-type="doi">10.1177/23821205251391969</pub-id><pub-id pub-id-type="medline">41181167</pub-id></nlm-citation></ref><ref id="ref57"><label>57</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sterne</surname><given-names>JAC</given-names> </name><name name-style="western"><surname>Savovi&#x0107;</surname><given-names>J</given-names> </name><name name-style="western"><surname>Page</surname><given-names>MJ</given-names> </name><etal/></person-group><article-title>RoB 2: a revised tool for assessing risk of bias in randomised trials</article-title><source>BMJ</source><year>2019</year><month>08</month><day>28</day><volume>366</volume><fpage>l4898</fpage><pub-id pub-id-type="doi">10.1136/bmj.l4898</pub-id><pub-id pub-id-type="medline">31462531</pub-id></nlm-citation></ref><ref id="ref58"><label>58</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sterne</surname><given-names>JA</given-names> </name><name name-style="western"><surname>Hern&#x00E1;n</surname><given-names>MA</given-names> </name><name name-style="western"><surname>Reeves</surname><given-names>BC</given-names> </name><etal/></person-group><article-title>ROBINS-I: a tool for assessing risk of bias in non-randomised studies of interventions</article-title><source>BMJ</source><year>2016</year><month>10</month><day>12</day><volume>355</volume><fpage>i4919</fpage><pub-id pub-id-type="doi">10.1136/bmj.i4919</pub-id><pub-id pub-id-type="medline">27733354</pub-id></nlm-citation></ref><ref id="ref59"><label>59</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Guyatt</surname><given-names>GH</given-names> </name><name name-style="western"><surname>Oxman</surname><given-names>AD</given-names> </name><name name-style="western"><surname>Vist</surname><given-names>GE</given-names> </name><etal/></person-group><article-title>GRADE: an emerging consensus on rating quality of evidence and strength of recommendations</article-title><source>BMJ</source><year>2008</year><month>04</month><day>26</day><volume>336</volume><issue>7650</issue><fpage>924</fpage><lpage>926</lpage><pub-id pub-id-type="doi">10.1136/bmj.39489.470347.AD</pub-id><pub-id pub-id-type="medline">18436948</pub-id></nlm-citation></ref><ref id="ref60"><label>60</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ioannidis</surname><given-names>JPA</given-names> </name></person-group><article-title>Why most published research findings are false</article-title><source>PLoS Med</source><year>2005</year><month>08</month><volume>2</volume><issue>8</issue><fpage>e124</fpage><pub-id pub-id-type="doi">10.1371/journal.pmed.0020124</pub-id><pub-id pub-id-type="medline">16060722</pub-id></nlm-citation></ref><ref id="ref61"><label>61</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Button</surname><given-names>KS</given-names> </name><name name-style="western"><surname>Ioannidis</surname><given-names>JPA</given-names> </name><name name-style="western"><surname>Mokrysz</surname><given-names>C</given-names> </name><etal/></person-group><article-title>Power failure: why small sample size undermines the reliability of neuroscience</article-title><source>Nat Rev Neurosci</source><year>2013</year><month>05</month><volume>14</volume><issue>5</issue><fpage>365</fpage><lpage>376</lpage><pub-id pub-id-type="doi">10.1038/nrn3475</pub-id></nlm-citation></ref><ref id="ref62"><label>62</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Hr&#x00F3;bjartsson</surname><given-names>A</given-names> </name><name name-style="western"><surname>G&#x00F8;tzsche</surname><given-names>PC</given-names> </name></person-group><article-title>Placebo interventions for all clinical conditions</article-title><source>Cochrane Database Syst Rev</source><year>2010</year><month>01</month><day>20</day><volume>2010</volume><issue>1</issue><fpage>CD003974</fpage><pub-id pub-id-type="doi">10.1002/14651858.CD003974.pub3</pub-id><pub-id pub-id-type="medline">20091554</pub-id></nlm-citation></ref><ref id="ref63"><label>63</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Rosenman</surname><given-names>R</given-names> </name><name name-style="western"><surname>Tennekoon</surname><given-names>V</given-names> </name><name name-style="western"><surname>Hill</surname><given-names>LG</given-names> </name></person-group><article-title>Measuring bias in self-reported data</article-title><source>Int J Behav Healthc Res</source><year>2011</year><month>10</month><volume>2</volume><issue>4</issue><fpage>320</fpage><lpage>332</lpage><pub-id pub-id-type="doi">10.1504/IJBHR.2011.043414</pub-id><pub-id pub-id-type="medline">25383095</pub-id></nlm-citation></ref><ref id="ref64"><label>64</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Schulz</surname><given-names>KF</given-names> </name><name name-style="western"><surname>Grimes</surname><given-names>DA</given-names> </name></person-group><article-title>Sample size calculations in randomised trials: mandatory and mystical</article-title><source>Lancet</source><year>2005</year><month>04</month><volume>365</volume><issue>9467</issue><fpage>1348</fpage><lpage>1353</lpage><pub-id pub-id-type="doi">10.1016/S0140-6736(05)61034-3</pub-id></nlm-citation></ref><ref id="ref65"><label>65</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>DeVellis</surname><given-names>RF</given-names> </name><name name-style="western"><surname>Thorpe</surname><given-names>CT</given-names> </name></person-group><source>Scale Development: Theory and Applications</source><year>2021</year><edition>5</edition><publisher-name>SAGE Publications, Inc</publisher-name><pub-id pub-id-type="other">9781544379340</pub-id></nlm-citation></ref><ref id="ref66"><label>66</label><nlm-citation citation-type="report"><person-group person-group-type="author"><name name-style="western"><surname>Campbell</surname><given-names>DT</given-names> </name><name name-style="western"><surname>Stanley</surname><given-names>JC</given-names> </name></person-group><article-title>Experimental and quasi-experimental designs for research</article-title><year>1963</year><access-date>2026-04-21</access-date><publisher-name>Houghton Mifflin Company</publisher-name><comment><ext-link ext-link-type="uri" xlink:href="https://www.sfu.ca/~palys/Campbell&#x0026;Stanley-1959-Exptl&#x0026;QuasiExptlDesignsForResearch.pdf">https://www.sfu.ca/~palys/Campbell&#x0026;Stanley-1959-Exptl&#x0026;QuasiExptlDesignsForResearch.pdf</ext-link></comment></nlm-citation></ref><ref id="ref67"><label>67</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Cook</surname><given-names>TD</given-names> </name><name name-style="western"><surname>Campbell</surname><given-names>DT</given-names> </name></person-group><source>Quasi-Experimentation: Design and Analysis Issues for Field Settings</source><year>1979</year><publisher-name>Houghton Mifflin Company</publisher-name><pub-id pub-id-type="other">0395307902</pub-id></nlm-citation></ref><ref id="ref68"><label>68</label><nlm-citation citation-type="web"><person-group person-group-type="author"><name name-style="western"><surname>Resnik</surname><given-names>DB</given-names> </name></person-group><article-title>What is ethics in research &#x0026; why is it important?</article-title><source>National Institute of Environmental Health Sciences</source><year>2020</year><month>12</month><day>23</day><access-date>2026-04-21</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.niehs.nih.gov/research/resources/bioethics/whatis">https://www.niehs.nih.gov/research/resources/bioethics/whatis</ext-link></comment></nlm-citation></ref></ref-list><app-group><supplementary-material id="app1"><label>Multimedia Appendix 1</label><p>Synthesis Without Meta-analysis (SWiM) reporting items.</p><media xlink:href="jmir_v28i1e82756_app1.docx" xlink:title="DOCX File, 20 KB"/></supplementary-material><supplementary-material id="app2"><label>Multimedia Appendix 2</label><p>Search strategy.</p><media xlink:href="jmir_v28i1e82756_app2.docx" xlink:title="DOCX File, 26 KB"/></supplementary-material><supplementary-material id="app3"><label>Checklist 1</label><p>PRISMA 2020 expanded checklist.</p><media xlink:href="jmir_v28i1e82756_app3.docx" xlink:title="DOCX File, 53 KB"/></supplementary-material></app-group></back></article>