<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="review-article"><front><journal-meta><journal-id journal-id-type="nlm-ta">J Med Internet Res</journal-id><journal-id journal-id-type="publisher-id">jmir</journal-id><journal-id journal-id-type="index">1</journal-id><journal-title>Journal of Medical Internet Research</journal-title><abbrev-journal-title>J Med Internet Res</abbrev-journal-title><issn pub-type="epub">1438-8871</issn><publisher><publisher-name>JMIR Publications</publisher-name><publisher-loc>Toronto, Canada</publisher-loc></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">v28i1e93354</article-id><article-id pub-id-type="doi">10.2196/93354</article-id><article-categories><subj-group subj-group-type="heading"><subject>Review</subject></subj-group></article-categories><title-group><article-title>Applications of DeepSeek in Medicine: Bibliometric Analysis and Scoping Review</article-title></title-group><contrib-group><contrib contrib-type="author" equal-contrib="yes"><name name-style="western"><surname>Zhang</surname><given-names>Haoran</given-names></name><degrees>MS</degrees><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="aff" rid="aff2">2</xref><xref ref-type="fn" rid="equal-contrib1">*</xref></contrib><contrib contrib-type="author" equal-contrib="yes"><name name-style="western"><surname>Wang</surname><given-names>Dawei</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff3">3</xref><xref ref-type="fn" rid="equal-contrib1">*</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Xu</surname><given-names>Yanliang</given-names></name><degrees>BSc</degrees><xref ref-type="aff" rid="aff4">4</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Han</surname><given-names>Shuming</given-names></name><degrees>MS</degrees><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author" corresp="yes"><name name-style="western"><surname>Wang</surname><given-names>Guangxin</given-names></name><degrees>MD, PhD</degrees><xref ref-type="aff" rid="aff2">2</xref></contrib></contrib-group><aff id="aff1"><institution>School of Clinical Medicine, Shandong Second Medical University</institution><addr-line>Weifang</addr-line><addr-line>Shandong</addr-line><country>China</country></aff><aff id="aff2"><institution>Shandong Innovation Center of Intelligent Diagnostic Technology, Central Hospital Affiliated to Shandong First Medical University</institution><addr-line>105 Jiefang Road</addr-line><addr-line>Jinan</addr-line><addr-line>Shandong</addr-line><country>China</country></aff><aff id="aff3"><institution>Key Laboratory of Endocrine Glucose &#x0026; Lipids Metabolism and Brain Aging, Ministry of Education; Department of Endocrinology, Shandong Provincial Hospital Affiliated to Shandong First Medical University</institution><addr-line>Jinan</addr-line><addr-line>Shandong</addr-line><country>China</country></aff><aff id="aff4"><institution>Library, Shandong Second Medical University</institution><addr-line>Weifang</addr-line><addr-line>Shandong</addr-line><country>China</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Coristine</surname><given-names>Andrew</given-names></name></contrib></contrib-group><contrib-group><contrib contrib-type="reviewer"><name name-style="western"><surname>Liu</surname><given-names>Fenglin</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Chen</surname><given-names>Sully</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Nazi</surname><given-names>Zabir Al</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to Guangxin Wang, MD, PhD, Shandong Innovation Center of Intelligent Diagnostic Technology, Central Hospital Affiliated to Shandong First Medical University, 105 Jiefang Road, Jinan, Shandong, 250013, China, 86 531 55865152; <email>y22183@email.sdfmu.edu.cn</email></corresp><fn fn-type="equal" id="equal-contrib1"><label>*</label><p>these authors contributed equally</p></fn></author-notes><pub-date pub-type="collection"><year>2026</year></pub-date><pub-date pub-type="epub"><day>15</day><month>6</month><year>2026</year></pub-date><volume>28</volume><elocation-id>e93354</elocation-id><history><date date-type="received"><day>11</day><month>02</month><year>2026</year></date><date date-type="rev-recd"><day>19</day><month>05</month><year>2026</year></date><date date-type="accepted"><day>20</day><month>05</month><year>2026</year></date></history><copyright-statement>&#x00A9; Haoran Zhang, Dawei Wang, Yanliang Xu, Shuming Han, Guangxin Wang. Originally published in the Journal of Medical Internet Research (<ext-link ext-link-type="uri" xlink:href="https://www.jmir.org">https://www.jmir.org</ext-link>), 15.6.2026. </copyright-statement><copyright-year>2026</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in the Journal of Medical Internet Research (ISSN 1438-8871), is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="https://www.jmir.org/">https://www.jmir.org/</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://www.jmir.org/2026/1/e93354"/><abstract><sec><title>Background</title><p>The integration of large language models (LLMs) into medicine has reshaped health care delivery, education, and research. Although proprietary models face challenges such as data privacy, regulation, and adaptability, DeepSeek, an open-source LLM, has emerged as a customizable and cost-effective alternative with significant potential for clinical and operational applications. However, the rapid expansion of research in this area necessitates a systematic mapping of its landscape, applications, and challenges.</p></sec><sec><title>Objective</title><p>This study combines bibliometric analysis with a scoping review to systematically map and characterize the literature on DeepSeek&#x2019;s medical applications. The aims were to (1) analyze publication trends, leading contributors, and research themes and (2) identify primary application domains, strengths, limitations, and future directions.</p></sec><sec sec-type="methods"><title>Methods</title><p>Following the framework by Arksey and O&#x2019;Malley and the PRISMA-ScR (Preferred Reporting Items for Systematic Reviews and Meta-Analyses Extension for Scoping Reviews) guidelines, a systematic search was conducted using PubMed, Web of Science, and Scopus from January 20, 2025, to November 30, 2025. Bibliometric analysis was then used to quantify publication trends, productivity, and research themes across 371 papers. The scoping review thematically synthesized the applications, strengths, and limitations of 353 original articles.</p></sec><sec sec-type="results"><title>Results</title><p>The publication output showed a progressive increase, with China (n=163), Turkey (n=52), and the United States (n=48) as leading contributors. Keyword co-occurrence analysis formed 7 clusters; the 3 most frequent keywords were &#x201C;large language model,&#x201D; &#x201C;artificial intelligence,&#x201D; and &#x201C;patient education.&#x201D; DeepSeek has shown promising yet preliminary performance across multiple domains, including patient education, clinical decision support, medical education, workflow optimization, and medical research. The evidence base remains predominantly low in quality, with 66.6% (235/353) of original articles classified as low-quality evidence, consisting largely of unvalidated benchmarking, simulated cases, and single-center retrospective analyses. Only 6.8% (24/353) of studies met the criteria to be considered high quality, and prospective randomized trials assessing patient-relevant outcomes were notably absent.</p></sec><sec sec-type="conclusions"><title>Conclusions</title><p>Publications on DeepSeek&#x2019;s medical applications increased progressively from January 2025 through November 2025, with China, Turkey, and the United States as the leading contributors. The scoping review found that DeepSeek has been evaluated across 5 domains (patient education, clinical decision support, medical education, workflow optimization, and research), with variable but often competitive performance relative to proprietary models. Strengths included readability, diagnostic accuracy in select specialties, cost-efficiency, and local deployability. Limitations included inconsistent cross-specialty performance, hallucinations, ethical concerns, data privacy issues, and regulatory gaps. The evidence base is predominantly low-quality and simulation-based, with few prospective trials or randomized controlled trials. These findings indicate that DeepSeek&#x2019;s clinical readiness varies, and future research should address prospective validation, multimodal capabilities, bias mitigation, human oversight, and equitable access.</p></sec></abstract><kwd-group><kwd>DeepSeek</kwd><kwd>large language model</kwd><kwd>artificial intelligence in medicine</kwd><kwd>clinical decision support</kwd><kwd>medical education</kwd><kwd>scoping review</kwd><kwd>biomedical ethics</kwd><kwd>PRISMA</kwd></kwd-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><p>The integration of artificial intelligence (AI), particularly large language models (LLMs), into medicine has prompted a paradigm shift in health care delivery, education, and research. LLMs, such as OpenAI&#x2019;s GPT series, have demonstrated considerable capabilities for processing complex medical data, supporting clinical decision-making, and improving patient communication. However, the widespread adoption of proprietary LLMs in clinical settings faces substantial challenges, including data privacy concerns, regulatory constraints, and limited adaptability to institutional requirements [<xref ref-type="bibr" rid="ref1">1</xref>]. In this context, DeepSeek, an open-source LLM developed by Hangzhou DeepSeek Artificial Intelligence Basic Technology Research Co Ltd, has emerged as a promising alternative, distinguished by its customizability, cost-effectiveness, and alignment with data governance standards [<xref ref-type="bibr" rid="ref1">1</xref>-<xref ref-type="bibr" rid="ref3">3</xref>]. This model represents a significant advancement in AI, particularly for its sophisticated reasoning capabilities and its impact on AI research and applications.</p><p>DeepSeek&#x2019;s architecture, especially in reasoning-enhanced iterations such as DeepSeek-R1, incorporates innovative training approaches, including Group Relative Policy Optimization (GRPO). This rule-based reinforcement learning paradigm, which functions without task-specific supervised fine-tuning during the reasoning alignment phase and builds upon a pretrained base model, fosters emergent reasoning behaviors that are particularly valuable for complex medical reasoning tasks [<xref ref-type="bibr" rid="ref4">4</xref>,<xref ref-type="bibr" rid="ref5">5</xref>]. This open-weight nature enables local deployment, making it particularly attractive in health care settings where data security and privacy are paramount [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref6">6</xref>]. Since its release, DeepSeek and its associated intelligent agents have been implemented in multiple tertiary hospitals across China, resulting in measurable improvements in clinical and operational workflows, including patient follow-up, imaging analysis, and administrative automation [<xref ref-type="bibr" rid="ref7">7</xref>-<xref ref-type="bibr" rid="ref9">9</xref>]. Such real-world implementations underscore the potential for redefining AI-driven health care delivery.</p><p>The growing corpus of studies evaluating DeepSeek medical applications has revealed several strengths. In clinical diagnostics, DeepSeek-R1 achieved a diagnostic accuracy comparable to that of GPT-4 in complex clinicopathological cases [<xref ref-type="bibr" rid="ref10">10</xref>]. In specialized areas, such as ophthalmology, it has exhibited diagnostic and management performance on par with OpenAI o1 while reducing token-related costs by approximately 15-fold [<xref ref-type="bibr" rid="ref11">11</xref>]. Moreover, DeepSeek excels in Chinese-language medical contexts, outperforming ChatGPT at delivering prostate cancer radiotherapy information in Chinese and demonstrating superior results on Chinese medical licensing examinations [<xref ref-type="bibr" rid="ref12">12</xref>,<xref ref-type="bibr" rid="ref13">13</xref>]. Beyond clinical decision support, DeepSeek shows promise in medical education, patient communication, and administrative tasks, with documented deployments across multiple Chinese tertiary hospitals supporting applications ranging from imaging interpretation to automated administrative workflows [<xref ref-type="bibr" rid="ref9">9</xref>]. However, these promising benchmarking results warrant further examination in real-world clinical settings, which are now emerging primarily in China.</p><p>The rapid integration of DeepSeek into clinical practice, particularly within Chinese hospital systems [<xref ref-type="bibr" rid="ref2">2</xref>,<xref ref-type="bibr" rid="ref9">9</xref>], underscores the necessity for a thorough evaluation of its applications, limitations, and future directions. The existing literature lacks a comprehensive assessment of publication trends and emerging research fronts in this rapidly evolving domain. Evidence remains fragmented across medical specialties, and the heterogeneous methodologies and outcomes limit a holistic understanding of the model&#x2019;s clinical utility, safety profile, and readiness for broader implementation. Therefore, a comprehensive synthesis of available evidence is essential to guide health care institutions, policymakers, and developers in evaluating DeepSeek&#x2019;s realistic capabilities, optimal deployment strategies, and associated risks.</p><p>To address this gap and systematically map the research landscape, this study adopted an integrated methodological approach that combined bibliometric analysis with a scoping review. Bibliometric analysis quantitatively characterizes the field at the macro level, examining publication trends over time, core authors and institutions, high-frequency keywords, and journal distributions. This enables the objective identification of research hot spots and evolutionary trajectories [<xref ref-type="bibr" rid="ref14">14</xref>,<xref ref-type="bibr" rid="ref15">15</xref>]. Simultaneously, a scoping review is a systematic methodology designed to map key concepts, evidence types, and knowledge gaps within a broad or emerging field. Rather than synthesizing evidence for definitive conclusions, it uses qualitative or descriptive methods to identify existing research themes, methodological characteristics, and underexplored areas, thereby clarifying the overall research landscape [<xref ref-type="bibr" rid="ref16">16</xref>]. Given that literature on DeepSeek in medicine is growing rapidly and includes highly heterogeneous publications, such as proof-of-concept studies, preclinical research, preliminary clinical trials, and technical descriptions, a scoping review is more suitable than a systematic review for this context, as it focuses on comprehensively mapping the domain without mandating formal quality appraisal. The combination of these two methods leveraged their complementary strengths: Bibliometric analysis provides an objective, structured quantitative overview, while the scoping review delivers a nuanced, contextualized conceptual map. This integrated analysis provided a more powerful and multidimensional understanding of the field&#x2019;s scope, developmental dynamics, and future directions from both quantitative and qualitative perspectives.</p><p>Guided by this integrated approach, the study was structured as follows. First, a bibliometric analysis was conducted to examine relevant original articles and reviews, addressing the following questions: (1) What are the volume, growth trajectory, and geographic distribution of publications? (2) Which countries/regions, institutions, and authors are leading the research? and (3) What are the key research themes and their evolution? Second, a scoping review was performed to critically evaluate the literature content, focusing on the following questions: What are the primary medical application domains of DeepSeek, and how do trends vary across different health care fields? Finally, the discussion synthesizes findings from both methods to highlight implementation challenges, identify major research gaps, and suggest future directions for the effective integration of DeepSeek into global health care systems.</p></sec><sec id="s2" sec-type="methods"><title>Methods</title><sec id="s2-1"><title>Overview</title><p>This study used an integrated approach that combined bibliometric analysis and a scoping review to provide complementary insights. The bibliometric method examined the current application of DeepSeek in medicine from multiple dimensions, analyzed researcher characteristics and journal distributions, and identified research hot spots and trends. The bibliometric analysis was conducted based on the framework proposed by Cobo et al [<xref ref-type="bibr" rid="ref17">17</xref>], following the guidelines for reporting bibliometric reviews of biomedical literature (BIBLIO) [<xref ref-type="bibr" rid="ref18">18</xref>]. This scoping review systematically extracted and synthesized the applications, challenges, and future research directions of DeepSeek in medicine. The study was conducted according to the framework by Arksey and O&#x2019;Malley [<xref ref-type="bibr" rid="ref19">19</xref>] and reported following the PRISMA-ScR (Preferred Reporting Items for Systematic Reviews and Meta-Analyses Extension for Scoping Reviews) guidelines (<xref ref-type="supplementary-material" rid="app4">Checklist 1</xref>) [<xref ref-type="bibr" rid="ref20">20</xref>].</p></sec><sec id="s2-2"><title>Databases, Search Strategy, and Screening Process</title><p>To ensure a comprehensive retrieval of the literature on the applications of DeepSeek in medicine, a systematic search was conducted on December 16, 2025, in PubMed, Web of Science Core Collection (WoSCC), and Scopus. The search strategy (<xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>) used both controlled vocabularies (MeSH, Web of Science Categories, and SUBJAREA) and free-text terms tailored to each database to optimize retrieval.</p><p>Given that the public release of DeepSeek&#x2019;s reasoning model, DeepSeek-R1, on January 20, 2025 [<xref ref-type="bibr" rid="ref21">21</xref>,<xref ref-type="bibr" rid="ref22">22</xref>], marked the beginning of subsequent research into its applications, including in medicine, the search encompassed the period from January 20, 2025, to November 30, 2025.</p><p>To ensure comprehensive retrieval, the inclusion criteria were as follows: (1) studies investigating the application of DeepSeek in medicine, (2) document types limited to original articles and reviews for bibliometric analysis and original articles only for scoping review, (3) studies published in peer-reviewed academic journals, and (4) no language restrictions.</p><p>The exclusion criteria were as follows: (1) duplicate publications; (2) literature that proposed only speculative or hypothetical uses without substantive analysis or findings; (3) non-peer-reviewed journal items, including books, editorials, preprints, commentaries, conference abstracts, case reports, and retracted articles; and (4) studies with insufficient information for bibliometric analysis or whose full text was unavailable for in-depth content extraction during the scoping review.</p><p>After receiving professional training, two authors (HZ and DW) independently screened the titles and abstracts and excluded irrelevant studies based on the aforementioned criteria. The interrater agreement was almost perfect (Cohen &#x03BA;=0.93). Any disagreements during screening were resolved through discussion or, when necessary, arbitration by a third reviewer (GW).</p></sec><sec id="s2-3"><title>Bibliometric Analysis</title><p>The final bibliometric analysis included 371 papers. Full records of the selected publications were exported and stored in Excel 2021 (Microsoft Corp) and EndNote Desktop (Clarivate). Bibliographic metadata such as authors&#x2019; names, affiliations, countries/regions, and keywords were standardized in a uniform format.</p><p>Excel 2021 was used to generate tables highlighting the top 10 authors, institutions, and countries/regions based on their publication output, whereas VOSviewer (version 1.6.19) was used for data visualization of bibliometric mapping, including keyword co-occurrence analysis. Keyword co-occurrence analysis examined the fundamental characteristics of keywords, such as their frequency and temporal evolution. This method helped identify research hot spots and track developmental trends within specialized fields. The three common types of visualizations used in the keyword co-occurrence analysis were the network, density, and overlay maps. In the network map, nodes represented keywords, and the connecting lines represented keyword co-occurrence relationships. The size of a node indicates its frequency, the thickness of a line represents the strength of co-occurrence, and the nodes are clustered together by color to reveal distinct research themes or subfields. The overlay map chronologically visualized the keyword trajectories by assigning chromatic codes corresponding to the computationally derived average publication years (APYs). The density map emphasizes the &#x201C;research density&#x201D; or concentration of keywords in the knowledge landscape. Areas with numerous closely located keywords appear as warm-colored regions, such as purple, indicating core well-developed research fronts. Cooler-colored areas such as blue or white represent sparser, potentially peripheral, or emerging topics. The centrality of keywords, which reflects their capacity to bridge different parts of the research network, was derived using CiteSpace (version 7.0.0).</p></sec><sec id="s2-4"><title>Scoping Review</title><p>This scoping review included a total of 353 publications. A data extraction form was created using Excel to extract in-depth content from the papers. This form included items such as paper title, research objectives, key findings, research design types, DeepSeek&#x2019;s strengths, limitations and challenges, future recommendations, DeepSeek model version, quality tier, and application areas. It should be noted that, although quality assessment is not obligatory for scoping reviews, the methodological quality of all included studies was categorized into 3 tiers (high, moderate, and low) based on the criteria (<xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>) in order to characterize the strength of the available evidence. Data extraction was conducted independently by 2 authors (HZ and DW). Both authors independently extracted data from all 353 included articles in duplicate using the data extraction form created in Excel. After independent extraction, the 2 authors compared their results. Disagreements were resolved through discussion or by consulting a third author (GW) when consensus could not be reached. The extracted data (<xref ref-type="supplementary-material" rid="app3">Multimedia Appendix 3</xref>) were then critically analyzed and organized thematically to address the research question, thereby mapping the key application areas of DeepSeek in medicine. The discussion section elaborates on the challenges, research gaps, and future work for the application of DeepSeek in the medical field.</p></sec><sec id="s2-5"><title>Ethical Considerations</title><p>Since this study was a bibliometric and scoping review of previously published literature, ethical approval from an ethics committee is not required.</p></sec></sec><sec id="s3" sec-type="results"><title>Results</title><sec id="s3-1"><title>Bibliometric Analysis of DeepSeek Applications in Medicine</title><p>A systematic search of PubMed, Scopus, and WoSCC yielded 371 publications on the application of DeepSeek in medicine for bibliometric analysis (<xref ref-type="fig" rid="figure1">Figure 1</xref>). Among these, the majority (363/371, 97.8%) were categorized as original articles, while the remaining (8/371, 2.2%) were reviews. In terms of publication languages, 358 papers were written in English, and 13 were written in Chinese.</p><fig position="float" id="figure1"><label>Figure 1.</label><caption><p>The diagram depicting the paper selection process. WoSCC: Web of Science Core Collection.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="jmir_v28i1e93354_fig01.png"/></fig></sec><sec id="s3-2"><title>Monthly Publication Output</title><p>The monthly publication output increased progressively over time. From January to November 2025, the number of papers rose from 0 to 70, with the highest output (70 papers) observed in November (<xref ref-type="fig" rid="figure2">Figure 2</xref>).</p><fig position="float" id="figure2"><label>Figure 2.</label><caption><p>Monthly count of publications on DeepSeek's medical applications identified in this review.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="jmir_v28i1e93354_fig02.png"/></fig></sec><sec id="s3-3"><title>Analysis of Source Journals</title><p>Of the 216 journals that published papers on the applications of DeepSeek in medicine, 12 published more than 5 papers each. The 10 most active journals collectively contributed 90 publications, accounting for 24.3% (90/371) of the total output. <italic>Cureus</italic> was the most productive journal with 19 publications, followed by <italic>Scientific Reports</italic> (n=10), <italic>BMC Oral Health</italic> (n=9), <italic>International Journal of Medical Informatics</italic> (n=9), <italic>BMC Medical Education</italic> (n=8), <italic>Frontiers in Artificial Intelligence</italic> (n=7), <italic>Frontiers in Public Health</italic> (n=7), <italic>JMIR Medical Informatics</italic> (n=7), <italic>Journal of Medical Internet Research</italic> (n=7), and <italic>Journal of Medical Systems</italic> (n=7).</p></sec><sec id="s3-4"><title>The Top 10 Authors, Institutions, and Nations/Regions Ranked by Publication Count</title><p><xref ref-type="table" rid="table1">Table 1</xref> presents the top 10 authors, institutions, and countries/regions ranked by their respective number of publications on the applications of DeepSeek in medicine.</p><table-wrap id="t1" position="float"><label>Table 1.</label><caption><p>The top 10 authors, organizations, and countries ranked by the number of papers.</p></caption><table id="table1" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Rank</td><td align="left" valign="bottom" colspan="2">Authors<sup><xref ref-type="table-fn" rid="table1fn1">a</xref></sup></td><td align="left" valign="bottom" colspan="2">Organizations<sup><xref ref-type="table-fn" rid="table1fn1">a</xref></sup></td><td align="left" valign="bottom" colspan="2">Countries/Regions<sup><xref ref-type="table-fn" rid="table1fn1">a</xref></sup></td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Name</td><td align="left" valign="top">Papers, n</td><td align="left" valign="top">Name</td><td align="left" valign="top">Papers, n</td><td align="left" valign="top">Name</td><td align="left" valign="top">Papers, n</td></tr></thead><tbody><tr><td align="left" valign="top">1</td><td align="left" valign="top">Liu Y</td><td align="left" valign="top">6</td><td align="left" valign="top">Shanghai Jiao Tong University</td><td align="left" valign="top">16</td><td align="left" valign="top">China</td><td align="left" valign="top">163</td></tr><tr><td align="left" valign="top">2</td><td align="left" valign="top">Zhang J</td><td align="left" valign="top">5</td><td align="left" valign="top">Chinese Academy of<break/>Medical Sciences</td><td align="left" valign="top">10</td><td align="left" valign="top">Turkey</td><td align="left" valign="top">52</td></tr><tr><td align="left" valign="top">3</td><td align="left" valign="top">Li J</td><td align="left" valign="top">5</td><td align="left" valign="top">Sichuan University</td><td align="left" valign="top">10</td><td align="left" valign="top">United States</td><td align="left" valign="top">48</td></tr><tr><td align="left" valign="top">4</td><td align="left" valign="top">Wang J</td><td align="left" valign="top">5</td><td align="left" valign="top">Zhejiang University</td><td align="left" valign="top">9</td><td align="left" valign="top">Germany</td><td align="left" valign="top">24</td></tr><tr><td align="left" valign="top">5</td><td align="left" valign="top">Wang Y</td><td align="left" valign="top">5</td><td align="left" valign="top">Capital Medical University</td><td align="left" valign="top">9</td><td align="left" valign="top">India</td><td align="left" valign="top">23</td></tr><tr><td align="left" valign="top">6</td><td align="left" valign="top">Xu L</td><td align="left" valign="top">4</td><td align="left" valign="top">University of Health<break/>Sciences, Turkey</td><td align="left" valign="top">9</td><td align="left" valign="top">United Kingdom</td><td align="left" valign="top">20</td></tr><tr><td align="left" valign="top">7</td><td align="left" valign="top">Rozen WM</td><td align="left" valign="top">3</td><td align="left" valign="top">Southern Medical University</td><td align="left" valign="top">8</td><td align="left" valign="top">Italy</td><td align="left" valign="top">14</td></tr><tr><td align="left" valign="top">8</td><td align="left" valign="top">Cuomo R</td><td align="left" valign="top">3</td><td align="left" valign="top">Soochow University</td><td align="left" valign="top">7</td><td align="left" valign="top">Saudi Arabia</td><td align="left" valign="top">14</td></tr><tr><td align="left" valign="top">9</td><td align="left" valign="top">Marcaccini G</td><td align="left" valign="top">3</td><td align="left" valign="top">Sun Yat-sen University</td><td align="left" valign="top">7</td><td align="left" valign="top">Australia</td><td align="left" valign="top">9</td></tr><tr><td align="left" valign="top">10</td><td align="left" valign="top">Chen S</td><td align="left" valign="top">3</td><td align="left" valign="top">Tsinghua University</td><td align="left" valign="top">6</td><td align="left" valign="top">Canada</td><td align="left" valign="top">8</td></tr></tbody></table><table-wrap-foot><fn id="table1fn1"><p><sup>a</sup>These 3 categories are independent of each other.</p></fn></table-wrap-foot></table-wrap></sec><sec id="s3-5"><title>Most Cited Papers on the Medical Applications of DeepSeek</title><p><xref ref-type="table" rid="table2">Table 2</xref> lists the 10 most-cited publications on the medical applications of DeepSeek: 9 were original articles, while 1 was a review [<xref ref-type="bibr" rid="ref12">12</xref>,<xref ref-type="bibr" rid="ref23">23</xref>-<xref ref-type="bibr" rid="ref31">31</xref>].</p><table-wrap id="t2" position="float"><label>Table 2.</label><caption><p>Top 10 most-cited publications on the medical applications of DeepSeek.</p></caption><table id="table2" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Rank</td><td align="left" valign="bottom">Authors</td><td align="left" valign="bottom">Publication date</td><td align="left" valign="bottom">Total citations, n</td><td align="left" valign="bottom">Research focus</td></tr></thead><tbody><tr><td align="left" valign="top">1</td><td align="left" valign="top">Zhou et al [<xref ref-type="bibr" rid="ref23">23</xref>]</td><td align="left" valign="top">June 2025</td><td align="left" valign="top">50</td><td align="left" valign="top">Comparative evaluation of DeepSeek and ChatGPT models</td></tr><tr><td align="left" valign="top">2</td><td align="left" valign="top">Deng et al [<xref ref-type="bibr" rid="ref24">24</xref>]</td><td align="left" valign="top">May 2025</td><td align="left" valign="top">38</td><td align="left" valign="top">DeepSeek&#x2019;s advances, applications, and challenges across various domains, including health care</td></tr><tr><td align="left" valign="top">3</td><td align="left" valign="top">Kaygisiz and Teke [<xref ref-type="bibr" rid="ref25">25</xref>]</td><td align="left" valign="top">April 2025</td><td align="left" valign="top">29</td><td align="left" valign="top">DeepSeek&#x2019;s diagnostic performance in oral pathologies</td></tr><tr><td align="left" valign="top">4</td><td align="left" valign="top">Rasool et al [<xref ref-type="bibr" rid="ref26">26</xref>]</td><td align="left" valign="top">March 2025</td><td align="left" valign="top">28</td><td align="left" valign="top">DeepSeek&#x2019;s emotion-aware embedding fusion for responses</td></tr><tr><td align="left" valign="top">5</td><td align="left" valign="top">Yilmaz et al [<xref ref-type="bibr" rid="ref27">27</xref>]</td><td align="left" valign="top">April 2025</td><td align="left" valign="top">16</td><td align="left" valign="top">Comparative performance of LLMs<sup><xref ref-type="table-fn" rid="table2fn1">a</xref></sup> on oral pathology multiple-choice questions</td></tr><tr><td align="left" valign="top">6</td><td align="left" valign="top">Marcaccini et al [<xref ref-type="bibr" rid="ref28">28</xref>]</td><td align="left" valign="top">March 2025</td><td align="left" valign="top">16</td><td align="left" valign="top">DeepSeek and AI<sup><xref ref-type="table-fn" rid="table2fn2">b</xref></sup> in hand fracture management</td></tr><tr><td align="left" valign="top">7</td><td align="left" valign="top">Luo et al [<xref ref-type="bibr" rid="ref12">12</xref>]</td><td align="left" valign="top">April 2025</td><td align="left" valign="top">16</td><td align="left" valign="top">DeepSeek versus ChatGPT in multilingual prostate cancer radiotherapy</td></tr><tr><td align="left" valign="top">8</td><td align="left" valign="top">&#x00D6;zcivelek and &#x00D6;zcan [<xref ref-type="bibr" rid="ref29">29</xref>]</td><td align="left" valign="top">May 2025</td><td align="left" valign="top">15</td><td align="left" valign="top">Comparative evaluation of AI chatbots on dental and maxillofacial prostheses</td></tr><tr><td align="left" valign="top">9</td><td align="left" valign="top">G&#x00FC;ltekin et al [<xref ref-type="bibr" rid="ref30">30</xref>]</td><td align="left" valign="top">August 2025</td><td align="left" valign="top">14</td><td align="left" valign="top">Comparative evaluation of AI models for patient education</td></tr><tr><td align="left" valign="top">10</td><td align="left" valign="top">Seth et al [<xref ref-type="bibr" rid="ref31">31</xref>]</td><td align="left" valign="top">March 2025</td><td align="left" valign="top">12</td><td align="left" valign="top">Evaluating DeepSeek and AI in hand surgery decisions</td></tr></tbody></table><table-wrap-foot><fn id="table2fn1"><p><sup>a</sup>LLMs: large language models.</p></fn><fn id="table2fn2"><p><sup>b</sup>AI: artificial intelligence.</p></fn></table-wrap-foot></table-wrap></sec><sec id="s3-6"><title>Keyword Co-Occurrence Analysis</title><p>A keyword co-occurrence analysis was performed to map predominant research hot spots. Synonyms were consolidated prior to analysis; specifically, &#x201C;large language model(s)&#x201D; was standardized as &#x201C;large language model,&#x201D; and &#x201C;generative artificial intelligence/AI&#x201D; was standardized as &#x201C;generative artificial intelligence.&#x201D; The top 10 keywords by frequency are listed in <xref ref-type="table" rid="table3">Table 3</xref>. Notably, &#x201C;generative artificial intelligence&#x201D; ranked seventh in frequency but third in centrality. From an initial set of 968 keywords, 41 occurring more than 4 times were included in the keyword co-occurrence analysis. These formed 7 well-defined clusters, visualized in the network map (<xref ref-type="fig" rid="figure3">Figure 3A</xref>).</p><p>The temporal overlay map (<xref ref-type="fig" rid="figure3">Figure 3B</xref>) illustrates the evolution of research focus, with keywords colored by their APYs. Purple nodes represent earlier themes, while crimson indicates more recent activity. Early research concentrated primarily on medical education. The keywords &#x201C;retrieval-augmented generation&#x201D; and &#x201C;oncology&#x201D; showed the highest APY, reflecting a rising interest in these areas.</p><p>The density map (<xref ref-type="fig" rid="figure3">Figure 3C</xref>) displays keywords according to their average frequency of occurrence. Crimson regions correspond to the most frequently occurring keywords, followed by blue and then white areas, in descending order.</p><table-wrap id="t3" position="float"><label>Table 3.</label><caption><p>The top 10 keywords regarding DeepSeek&#x2019;s applications in medicine.</p></caption><table id="table3" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Rank</td><td align="left" valign="bottom">Keywords</td><td align="left" valign="bottom">Frequency of occurrence, n</td><td align="left" valign="bottom">Centrality</td></tr></thead><tbody><tr><td align="left" valign="top">1</td><td align="left" valign="top">Large language model</td><td align="left" valign="top">227</td><td align="left" valign="top">1.00</td></tr><tr><td align="left" valign="top">2</td><td align="left" valign="top">Artificial intelligence</td><td align="left" valign="top">197</td><td align="left" valign="top">0.55</td></tr><tr><td align="left" valign="top">3</td><td align="left" valign="top">Patient education</td><td align="left" valign="top">30</td><td align="left" valign="top">0.02</td></tr><tr><td align="left" valign="top">4</td><td align="left" valign="top">Medical education</td><td align="left" valign="top">28</td><td align="left" valign="top">0.01</td></tr><tr><td align="left" valign="top">5</td><td align="left" valign="top">Clinical decision support</td><td align="left" valign="top">19</td><td align="left" valign="top">0.01</td></tr><tr><td align="left" valign="top">6</td><td align="left" valign="top">Machine learning</td><td align="left" valign="top">19</td><td align="left" valign="top">0.05</td></tr><tr><td align="left" valign="top">7</td><td align="left" valign="top">Generative artificial intelligence</td><td align="left" valign="top">19</td><td align="left" valign="top">0.07</td></tr><tr><td align="left" valign="top">8</td><td align="left" valign="top">Natural language processing</td><td align="left" valign="top">9</td><td align="left" valign="top">0.01</td></tr><tr><td align="left" valign="top">9</td><td align="left" valign="top">Prompt engineering</td><td align="left" valign="top">8</td><td align="left" valign="top">0.00</td></tr><tr><td align="left" valign="top">10</td><td align="left" valign="top">Diagnostic accuracy</td><td align="left" valign="top">8</td><td align="left" valign="top">0.03</td></tr></tbody></table></table-wrap><fig position="float" id="figure3"><label>Figure 3.</label><caption><p>Keyword co-occurrence analysis visualization using VOSviewer [<xref ref-type="bibr" rid="ref32">32</xref>,<xref ref-type="bibr" rid="ref33">33</xref>]: (A) network visualization, with keywords grouped into 6 distinct thematic clusters; (B) overlay map colored by the average publication year of each keyword, ranging from purple (earlier) to crimson (recent); and (C) density map based on keyword co occurrence frequency, where color intensity reflects occurrence rate: crimson (highest), blue (moderate), and white (lowest).</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="jmir_v28i1e93354_fig03.png"/></fig><sec id="s3-6-1"><title>Summary of Extracted Data in the Scoping Review: Study Quality, Model Versions, Comparative Performance, and Documented Limitations</title><p>Of the 353 original articles, 24 (6.8%) met the criteria for high quality. These were primarily prospective evaluations and studies with external validation. A further 94 studies (94/353, 26.6%) were classified as moderate quality. The majority (235/353, 66.6%) were classified as low quality, reflecting the exploratory nature of the current evidence base, which is dominated by invalidated benchmarking using examination questions and single-center retrospective analyses.</p><p>Analysis of DeepSeek-specific versions revealed that DeepSeek-R1 was the most frequently studied (mentioned in 197 papers, 55.8% of the 353 articles), followed by DeepSeek-V3 (114/353, 32.3%) and unspecified versions of DeepSeek (61/353, 17.3%).</p><p>A total of 283 studies compared DeepSeek with other LLMs, primarily ChatGPT, in medical applications. Among these, 126 studies (126/283, 44.5%) reported positive results in which DeepSeek outperformed or showed significant advantages; 84 studies (84/283, 29.7%) reported neutral results with comparable performance, no statistically significant difference, or mixed strengths and limitations; and 73 studies (73/283, 25.8%) reported negative results in which DeepSeek underperformed relative to other models.</p><p>DeepSeek&#x2019;s primary weaknesses included inconsistent domain performance in 61 papers, incomplete answers in 47 papers, poor readability in 42 papers, and hallucinations in 38 papers. Ethical risks, though fewer in absolute count at 57 papers, were severe; specifically, non-maleficence was documented in 22 papers with potential patient harm, autonomy was documented in 15 papers with privacy and informed consent concerns, beneficence was documented in 8 papers with lack of empathy and impaired therapeutic relationship, and justice was documented in 12 papers highlighting bias and inequity. Other barriers reported in 55 papers further hindered clinical adoption.</p></sec><sec id="s3-6-2"><title>Application Domains of DeepSeek in Medicine</title><p>Based on the scoping review of 353 full-text papers, the medical applications of DeepSeek can be summarized into the primary domains discussed in the following sections. Because a single study often evaluated DeepSeek in multiple domains, the sum of article counts across these domains exceeds 353.</p><sec id="s3-6-2-1"><title>DeepSeek in Patient Education and Communication</title><p>The applications of DeepSeek in patient education and communication were addressed in 105 articles. Among these, 91 were cross-sectional studies, 5 were descriptive studies, 4 were prospective studies including 1 randomized controlled trial (RCT), and the remaining 5 used other design types.</p><p>DeepSeek can generate patient-facing materials that are both readily comprehensible and clinically accurate. This capability has been empirically validated; for example, in generating patient education materials for spinal surgeries, DeepSeek-R1 achieved the lowest Flesch-Kincaid Grade Level scores, indicating content accessible to a broader audience including those with limited health literacy [<xref ref-type="bibr" rid="ref23">23</xref>]. Similarly, in orthopedics, DeepSeek-R1 provided clearer and more easily understandable explanations of anterior cruciate ligament surgery than ChatGPT, which offered greater comprehensiveness but at a higher reading level [<xref ref-type="bibr" rid="ref30">30</xref>]. This emphasis on linguistic accessibility is critical in patient-facing materials because improved readability enhances patient engagement, reduces anxiety, and supports informed decision-making [<xref ref-type="bibr" rid="ref23">23</xref>,<xref ref-type="bibr" rid="ref34">34</xref>]. Furthermore, DeepSeek has performed strongly in multilingual contexts, effectively generating patient education content in both Chinese and English, which is vital for serving diverse linguistic populations [<xref ref-type="bibr" rid="ref12">12</xref>,<xref ref-type="bibr" rid="ref35">35</xref>].</p><p>Although DeepSeek excels in readability, its responses sometimes lack comprehensive detail or sufficient citations of sources, and occasional inaccuracies or AI hallucinations have been noted [<xref ref-type="bibr" rid="ref29">29</xref>,<xref ref-type="bibr" rid="ref36">36</xref>,<xref ref-type="bibr" rid="ref37">37</xref>]. Furthermore, some studies found that DeepSeek performed similarly to, or even less accurately than, ChatGPT when generating patient education materials [<xref ref-type="bibr" rid="ref38">38</xref>,<xref ref-type="bibr" rid="ref39">39</xref>].</p></sec><sec id="s3-6-2-2"><title>DeepSeek in Clinical Decision Support and Treatment Planning</title><p>Of the 176 articles addressing DeepSeek in clinical decision support and treatment planning, 120 were cross-sectional studies, 22 were retrospective studies, 9 were prospective studies (including 2 RCTs), 2 were mixed-design studies, 14 were proof-of-concept studies, and the remaining 9 articles comprised expert consensus and other designs.</p><p>Regarding diagnostic accuracy, DeepSeek models have achieved notable results. In a dual-phase retrospective-prospective study classified as high methodological quality (n=300 liver lesions in the retrospective cohort and 126 liver lesions in the prospective cohort), DeepSeek-V3 demonstrated higher Liver Imaging Reporting and Data System (LI-RADS) classification accuracy than junior radiologists and achieved performance comparable with that of senior radiologists for hepatocellular carcinoma diagnosis [<xref ref-type="bibr" rid="ref40">40</xref>]; however, this finding awaits replication in larger, multicenter settings. In a moderate-quality historical control study, DeepSeek-R1 demonstrated diagnostic accuracy comparable to that of GPT-4 in complex clinicopathologic cases [<xref ref-type="bibr" rid="ref10">10</xref>]. In a low-quality cross-sectional study, Jiao et al [<xref ref-type="bibr" rid="ref11">11</xref>] found that diagnostic accuracy in diagnosing corneal diseases varied significantly among LLMs (<italic>P</italic>=.001). GPT-4o achieved the highest accuracy (80%), while DeepSeek R1 achieved only 65%; both had accuracies that were significantly lower than that of human experts (92.5%; (<italic>P</italic>&#x003C;.001).</p><p>For treatment planning, DeepSeek-V3 demonstrated statistically superior accuracy compared with ChatGPT-o1 in head and neck cancer management [<xref ref-type="bibr" rid="ref41">41</xref>], and DeepSeek-R1 outperformed OpenAI o1 in diagnostic accuracy and next-step decision-making in ophthalmology [<xref ref-type="bibr" rid="ref42">42</xref>]. These models have demonstrated strengths in specialized domains, including hand fracture management [<xref ref-type="bibr" rid="ref28">28</xref>], urinary incontinence management [<xref ref-type="bibr" rid="ref43">43</xref>], and postprostatectomy urinary incontinence guidelines [<xref ref-type="bibr" rid="ref44">44</xref>], although they have limitations in complex scenarios. Notably, DeepSeek&#x2019;s clinical reasoning capabilities are enhanced through its reinforcement learning framework, which enables emergent reasoning patterns, such as self-reflection and verification [<xref ref-type="bibr" rid="ref5">5</xref>], contributing to its strong performance in clinical decision support tasks. However, although DeepSeek shows promising capabilities for clinical decision support, it cannot replace multidisciplinary tumor boards or human expertise, as it lacks contextual clinical judgment, physical examination capabilities, and the ability to negotiate complex trade-offs among specialists; instead, it streamlines clinical workflows by rapidly organizing patient data [<xref ref-type="bibr" rid="ref41">41</xref>]. The integration of few-shot prompting has been shown to substantially enhance DeepSeek&#x2019;s accuracy in specialized tasks, such as Coronary Artery Disease Reporting and Data System (CAD-RADS) category assignment [<xref ref-type="bibr" rid="ref42">42</xref>], suggesting that optimal prompt engineering is crucial for clinical implementation.</p><p>Overall, DeepSeek has emerged as a scalable tool to support treatment decisions, streamline workflows, and reduce diagnostic errors; however, integration requires careful validation and human oversight to mitigate risks.</p></sec><sec id="s3-6-2-3"><title>DeepSeek in Medical Education and Benchmarking</title><p>Of 109 articles addressing the applications of DeepSeek in medical education and benchmarking, 93 were cross-sectional studies, 6 were retrospective studies, 5 were perspective studies, and 5 were descriptive studies.</p><p>On the Chinese National Medical Licensing Examination, DeepSeek-R1 achieved 92% accuracy, significantly outperforming ChatGPT-4o (87.2%) and demonstrating strength on low-difficulty questions [<xref ref-type="bibr" rid="ref13">13</xref>]. Similarly, in the gastroenterology board examinations, both the base R1 model (77.1%) and search-augmented version (81.5%) surpassed the passing threshold and significantly outperformed the offline ChatGPT-3 (65.1%) and ChatGPT-4 (62.4%) models [<xref ref-type="bibr" rid="ref45">45</xref>]. Cross-specialty comparisons revealed consistent patterns: In basic medical sciences, DeepSeek-R1 scored 78.33% alongside ChatGPT-4, whereas in clinical sciences, it scored 87.5%, demonstrating robust knowledge integration [<xref ref-type="bibr" rid="ref46">46</xref>]. When evaluated against other reasoning-enhanced models on ophthalmology board-style questions, DeepSeek-R1 (72.5%) and its lighter variant R1-Lite (76.5%) performed competitively with OpenAI o1 Pro (83.4%), suggesting a balanced trade-off between performance and computational efficiency [<xref ref-type="bibr" rid="ref47">47</xref>]. The model also demonstrated strong anatomical knowledge, achieving 89.2% accuracy on Turkish Dental Specialty Admission Exam anatomy questions, comparable with other major models, though below ChatGPT-4o&#x2019;s 98.6% [<xref ref-type="bibr" rid="ref48">48</xref>]. These benchmark studies collectively indicate that DeepSeek provides a cost-effective, open-weight alternative for medical education, with utility in knowledge assessment and examination preparation. However, performance gaps persist in specialized domains and image-based questions, highlighting areas for future development and the continued need for human oversight in comprehensive medical education frameworks.</p></sec><sec id="s3-6-2-4"><title>DeepSeek for Clinical Workflow Optimization</title><p>A total of 63 articles described DeepSeek for clinical workflow optimization, including 26 cross-sectional studies, 2 descriptive studies, 17 retrospective studies, 4 prospective studies, 10 proof-of-concept studies, and 4 articles with other study designs.</p><p>The integration of DeepSeek models into health care systems offers significant potential to enhance operational efficiency and streamline clinical workflows, primarily by automating routine and time-consuming tasks. A prominent example is the locally deployed closed-loop system powered by DeepSeek for quality control of electronic nursing documentation. This system implements a comprehensive framework spanning the real-time, final, and vertical dimensions of quality assurance. The results include a dramatic reduction in documentation omission rates from 7.19% to just 1.79%; a decline in logical inconsistencies from 9.35% to 0.72%; and the complete elimination of timeliness errors, which previously stood at 8.63%. Concurrently, the quality control time per record decreased by 3.2-fold, reallocating nursing efforts toward direct patient care [<xref ref-type="bibr" rid="ref6">6</xref>].</p><p>In dyslipidemia management, DeepSeek, alongside Claude-3 and GPT-4, optimized guideline-based workflows across 30 standardized cases, boosting accuracy from 72% for physicians to 91% with AI. Integration with human experts further raised simulated low-density lipoprotein cholesterol target attainment to 92%, demonstrating its utility in minimizing guideline deviations while enhancing workflow efficiency [<xref ref-type="bibr" rid="ref49">49</xref>]. However, one moderate-quality study found that DeepSeek R1 achieved an accuracy of only 48.4% in a noncritical emergency department triage task, which is significantly lower than that of another LLM, Gemini 2.0 flash (73.8%) [<xref ref-type="bibr" rid="ref50">50</xref>].</p><p>The large-scale deployment of DeepSeek across nearly 90 Chinese tertiary hospitals has reportedly increased patient follow-up efficiency 40-fold, marking a transformative impact on hospital administration and clinical workflow automation [<xref ref-type="bibr" rid="ref9">9</xref>]. By managing labor-intensive tasks with high consistency and speed, DeepSeek enables a paradigm shift from reactive to proactive operational governance. This transition enabled health care professionals to focus their expertise on more complex clinical decision-making responsibilities.</p></sec></sec></sec><sec id="s3-7"><title>Medical Research and Data Analysis</title><p>Medical research and data analysis were mentioned in 73 articles. Among these, 41 had a cross-sectional design, 6 were descriptive studies, 2 were perspective studies, 9 were proof-of-concept studies, 9 were retrospective studies, 1 had a mixed design, and the remaining 5 used other design types.</p><p>DeepSeek models have demonstrated significant utility in accelerating and refining medical research and data analysis workflows. DeepSeek facilitates the reading of medical literature, information extraction, and screening. Several studies have developed AI-powered screening tools using DeepSeek to identify relevant studies for systematic reviews, reporting high accuracy and a significant reduction in manual workload [<xref ref-type="bibr" rid="ref51">51</xref>-<xref ref-type="bibr" rid="ref53">53</xref>]. For example, the LitAutoScreener tool, which integrates DeepSeek, achieved high accuracy and significantly improved screening efficiency, reducing the processing time to seconds per article [<xref ref-type="bibr" rid="ref51">51</xref>]. Similarly, other evaluations have confirmed that DeepSeek-based tools can reduce manual workload while maintaining high recall rates in literature screening for meta-analyses [<xref ref-type="bibr" rid="ref53">53</xref>]. In fields such as aging research, DeepSeek-R1 is part of a multi-LLM ensemble that successfully extracts protocol details from clinical trial records, doubling the yield of conventional search methods and achieving expert-level accuracy for core data points [<xref ref-type="bibr" rid="ref54">54</xref>]. Second, DeepSeek assists with generating and refining research topics and study designs. It helps researchers analyze cutting-edge trends, funding guidelines, and successful grant applications, thereby validating the novelty of the proposed research questions [<xref ref-type="bibr" rid="ref3">3</xref>]. For instance, DeepSeek-R1 has been used to explore novel research ideas and generate systematic review topics in fields such as oral and maxillofacial surgery [<xref ref-type="bibr" rid="ref55">55</xref>]. Similarly, in biomedical research, DeepSeek models show promise in extracting structured pre-analytical variability data from the scientific literature, facilitating standardized reporting and systematic evaluation [<xref ref-type="bibr" rid="ref56">56</xref>]. Furthermore, DeepSeek serves as a valuable tool for peer review and for critiquing research proposals. Its capacity to generate high-quality evidence-based responses enables a preliminary assessment of a proposal&#x2019;s feasibility and soundness. This function is particularly beneficial in multidisciplinary contexts where the model&#x2019;s ability to synthesize information from diverse sources significantly enhances the evaluation process [<xref ref-type="bibr" rid="ref57">57</xref>,<xref ref-type="bibr" rid="ref58">58</xref>]. Third, DeepSeek demonstrated substantial potential as an assistant for drafting, editing, and refining the content of medical research papers. Its capabilities span various domains of medical research and practice, making it a versatile tool for enhancing the quality and efficiency of academic writing. The model&#x2019;s proficiency at generating structured, clear, and comprehensible content is particularly valuable in medical research, where precision and clarity are paramount [<xref ref-type="bibr" rid="ref59">59</xref>].</p></sec><sec id="s3-8"><title>Other Application Domains</title><p>In other application domains, 25 articles were identified, comprising 18 cross-sectional studies, 2 perspective articles, 2 descriptive studies, and 3 proof-of-concept studies.</p><p>Beyond the primary domains discussed, DeepSeek has been explored in several niche but critical areas, including treatment outcome prediction, drug development assistance, and suicide risk prediction. Instead of reactive question-answering, DeepSeek is integrated into predictive analytics platforms. It can proactively flag at-risk patients, suggest personalized screening intervals, and predict individual responses to therapies based on electronic health records and real-time data [<xref ref-type="bibr" rid="ref60">60</xref>,<xref ref-type="bibr" rid="ref61">61</xref>]. In nasopharyngeal carcinoma, DeepSeek-V3-0324 demonstrated superior performance in treatment response evaluation compared with ChatGPT-4o-latest (96.5% vs 82.9%) and showed stronger agreement with expert annotations [<xref ref-type="bibr" rid="ref62">62</xref>].</p><p>In drug discovery, DeepSeek aids with predicting drug-drug interactions and molecular property modeling, achieving superior performance in regression and classification tasks critical to drug discovery [<xref ref-type="bibr" rid="ref63">63</xref>,<xref ref-type="bibr" rid="ref64">64</xref>].</p><p>The model&#x2019;s chain-of-thought enabled analysis of factors associated with correct predictions, such as substance abuse and age-related comorbidities. This application underscores DeepSeek&#x2019;s potential for mental health risk assessment, though further validation is needed [<xref ref-type="bibr" rid="ref65">65</xref>].</p></sec></sec><sec id="s4" sec-type="discussion"><title>Discussion</title><sec id="s4-1"><title>Main Findings</title><p>This integrated bibliometric and scoping review provided a comprehensive early-stage mapping of the rapidly evolving research landscape concerning DeepSeek&#x2019;s applications in medicine. This field is characterized by explosive growth, global engagement, and exploration across a remarkably diverse spectrum of clinical and operational domains. The findings collectively underscore DeepSeek&#x2019;s emergence not merely as another LLM but as a potent, open-weight contender with specific capabilities that address critical needs in modern health care, including cost-effectiveness, linguistic accessibility, and scalability.</p><p>Bibliometric data showed a research frontier that has been intensively explored. The increased publication output regarding applications of DeepSeek in medicine is clear. Our results align with those of an analysis of the global research profile of another LLM, ChatGPT, conducted by Alessandri-Bonetti et al [<xref ref-type="bibr" rid="ref66">66</xref>], who revealed explosive growth in publications during the first 7 months after its release. This pattern is also consistent with a broader LLM systematic review by Chen et al [<xref ref-type="bibr" rid="ref67">67</xref>], which reported that, between January 2022 and September 2025, approximately 3.2 clinical LLM studies were published per day, with a linear increase of 7.04 studies per month following the release of ChatGPT. Notably, DeepSeek was not included in the analysis by Chen et al [<xref ref-type="bibr" rid="ref67">67</xref>], underscoring the gap and the need for our focused review.</p><p>The geographical and institutional productivity led by China, followed by Turkey and the United States, reflects widespread international interest of DeepSeek&#x2019;s potential, with major academic medical centers driving early investigations. Papers on DeepSeek&#x2019;s applications in medicine have been published in various journals, ranging from well-known open-access journals such as <italic>Cureus</italic> and <italic>Scientific Reports</italic> to professional medical informatics and medical education journals such as the <italic>Journal of Medical Internet Research</italic>. This publication pattern indicates that the research reaches both broad scientific and specialized clinical audiences. Keyword co-occurrence analysis effectively identified the core themes of this research trend. The temporal overlay, which revealed a shift from foundational medical education topics toward more specialized areas such as &#x201C;retrieval-augmented generation&#x201D; and &#x201C;oncology,&#x201D; illustrates the field&#x2019;s rapid maturation and deepening focus. Synthesizing the scoping review findings, DeepSeek as a medical tool initially gained attention for its strength in democratizing medical information. For instance, in patient education, it can generate outputs with higher readability than its counterparts, such as ChatGPT.</p><p>Perhaps the most striking finding is that DeepSeek has demonstrated competitive and sometimes superior performance compared with existing proprietary models in clinical decision support tasks. The bibliometric analysis revealed that &#x201C;clinical decision support&#x201D; formed the largest cluster, while the scoping review further indicated that these studies primarily focused on three specific tasks: &#x201C;aiding diagnosis,&#x201D; &#x201C;differential diagnosis,&#x201D; and &#x201C;treatment plan formulation.&#x201D; The evidence that DeepSeek-V3 can match senior radiologists at specialized diagnostic classifications or that DeepSeek-R1 rivals GPT-4 and OpenAI o1 in diagnostic accuracy across ophthalmology and complex clinicopathological cases challenges the assumption that superior capability is the exclusive domain of closed, commercial models. This &#x201C;performance parity&#x201D; achieved through an open-weight architecture has profound implications. Specifically, it suggests a pathway toward breaking the monopoly of advanced AI in clinical support, potentially fostering innovation, reducing costs, and allowing for better adaptation to local health care contexts and linguistic needs.</p><p>The utility of this model in medical education and benchmarking further supports its position as a disruptive and cost-effective tool [<xref ref-type="bibr" rid="ref68">68</xref>]. For institutions and learners worldwide, particularly in resource-constrained settings, DeepSeek offers a viable, high-quality alternative for exam preparation, simulation, and curriculum development, potentially lowering the barriers to accessing advanced medical training aids.</p><p>Beyond its direct clinical and educational applications, this review highlighted DeepSeek&#x2019;s transformative potential across broader health care operations. Documented case studies have demonstrated reductions in documentation error rates in nursing and lower specimen return rates in gynecological examinations and enabled large-scale patient follow-up. By automating a vast array of low-complexity tasks, DeepSeek can free human resources to provide higher quality care and reduce systemic inefficiencies across the health care continuum.</p><p>Of the 353 papers included in this scoping review, only 6.8% (24/353) met the criteria for high quality, whereas the majority (235/353, 66.6%) were classified as low quality, consisting predominantly of invalidated benchmarking using examination questions, single-center convenience samples, and proof-of-concept studies. This distribution reflects a critical gap in the current literature: The rapid proliferation of DeepSeek in medicine has been accompanied by an abundance of exploratory studies with limited external validity. Although such benchmarking studies offer valuable insights into the model&#x2019;s technical capabilities and serve as initial performance indicators, they do not directly inform real-world diagnostic accuracy, patient safety, or clinical utility [<xref ref-type="bibr" rid="ref69">69</xref>].</p><p>In head-to-head comparisons with other LLMs, DeepSeek demonstrated predominantly favorable or comparable performance: Positive outcomes (126/283, 44.5%) were more frequent than negative ones (73/283, 25.8%), and a substantial proportion of studies (84/283, 29.7%) showed no clear superiority of either model. However, because these results derived predominantly from low-quality (235/353, 66.6%) or moderate-quality evidence, with only 6.8% (24/353) meeting high methodological standards, performance claims should be considered preliminary and hypothesis-generating rather than definitive. Clinically, it excels in open-source accessibility, low cost, readability, Chinese language proficiency, and structured reasoning; nonetheless, limitations, including occasional inaccuracies, lower reliability in certain tasks, and the absence of prospective clinical trials, necessitate continued validation and human oversight.</p></sec><sec id="s4-2"><title>Comparison With Prior Reviews on Other LLMs in Medicine</title><p>To contextualize the novel and distinct contributions of our work, we compared this review with existing reviews of other LLMs in medicine, such as ChatGPT, GPT-4, LLaMA, and Gemini. Several prior reviews have documented the rapid adoption of proprietary LLMs in health care, highlighting their utility in clinical reasoning, medical education, and patient communication [<xref ref-type="bibr" rid="ref66">66</xref>,<xref ref-type="bibr" rid="ref70">70</xref>,<xref ref-type="bibr" rid="ref71">71</xref>]. However, most existing reviews have primarily focused on closed-source models, which are characterized by limited transparency, restricted capacity for local deployment, and substantial cost barriers. These limitations hinder their scalability and reduce their adaptability across diverse institutional settings. In contrast, this review specifically focused on DeepSeek, an open-weight LLM, and identified several distinctive features that differentiate it from the patterns reported in previous LLM reviews.</p><p>First, methodologically, we combined bibliometric analysis with a scoping review to provide both quantitative mapping of research trends and qualitative synthesis of applications and challenges of DeepSeek in medicine, a dual approach rarely applied in prior LLM reviews, which have tended to rely on either bibliometric or narrative synthesis alone [<xref ref-type="bibr" rid="ref66">66</xref>,<xref ref-type="bibr" rid="ref71">71</xref>].</p><p>Second, geographically, the research landscapes differ substantially. For ChatGPT, early publications were predominantly led by institutions in the United States and Europe, with a wide distribution across high-income countries [<xref ref-type="bibr" rid="ref66">66</xref>,<xref ref-type="bibr" rid="ref70">70</xref>]. In contrast, our analysis identified China as the dominant contributor to DeepSeek medical research (163 papers), followed by Turkey and the United States. This pattern aligns with DeepSeek&#x2019;s country of origin and its rapid deployment across Chinese tertiary hospitals [<xref ref-type="bibr" rid="ref2">2</xref>,<xref ref-type="bibr" rid="ref9">9</xref>]. Notably, the early and substantial involvement of Turkish researchers (52 papers) in DeepSeek research is a distinctive feature not observed in early ChatGPT literature.</p><p>Third, previous reviews focused predominantly on proprietary models such as ChatGPT, GPT-4, LLaMA, and Gemini. In contrast, our study addressed a significant gap by examining an open-source alternative with distinct architectural advantages and greater deployment flexibility. In terms of real-world deployment, the deployment of DeepSeek across nearly 90 tertiary hospitals in China has resulted in measurable improvements in workflow efficiency and documentation quality. This scale of implementation has not been reported in similar reviews of other LLMs, which have largely focused on simulated or benchmarking studies [<xref ref-type="bibr" rid="ref6">6</xref>,<xref ref-type="bibr" rid="ref9">9</xref>]. In terms of application areas, prior work on ChatGPT and other proprietary LLMs identified medical education, clinical decision support, and patient communication as core areas [<xref ref-type="bibr" rid="ref70">70</xref>,<xref ref-type="bibr" rid="ref71">71</xref>]. Our keyword co-occurrence analysis confirmed that these are also central themes for DeepSeek. However, DeepSeek&#x2019;s open-weight architecture introduces distinctive features not emphasized in proprietary LLM reviews: on-premises deployability, data privacy, cost-effectiveness, and superior performance in Chinese-language medical tasks. These features represent unique contributions of DeepSeek to the medical LLM landscape and are not simply typical of any newly introduced LLM. Regarding performance and utility, our findings demonstrated that DeepSeek achieved competitive or superior performance compared with proprietary models in clinical diagnostics, medical licensing examinations, and patient education while substantially reducing costs, advantages that prior reviews have identified as critical unmet needs in AI integration [<xref ref-type="bibr" rid="ref71">71</xref>,<xref ref-type="bibr" rid="ref72">72</xref>].</p></sec><sec id="s4-3"><title>Challenges in the Applications of DeepSeek in Medicine</title><p>Guided by an ethical framework, the efficacy and safety of any medical intervention must be carefully calibrated in modern medical practice [<xref ref-type="bibr" rid="ref73">73</xref>]. As aforementioned, DeepSeek demonstrates significant potential for enhancing medical workflows, medical education, and research. However, its application faces numerous challenges in terms of effectiveness and safety, including accuracy issues, data privacy concerns, ethical uncertainties, and diverse global regulations governing AI.</p></sec><sec id="s4-4"><title>Accuracy and Variable Performance Across Medical Domains and Specialties</title><p>Although DeepSeek has demonstrated diagnostic accuracy comparable to that of specialist clinicians and proprietary models in certain areas [<xref ref-type="bibr" rid="ref40">40</xref>,<xref ref-type="bibr" rid="ref74">74</xref>-<xref ref-type="bibr" rid="ref76">76</xref>], its overall efficacy remains inconsistent [<xref ref-type="bibr" rid="ref42">42</xref>,<xref ref-type="bibr" rid="ref77">77</xref>,<xref ref-type="bibr" rid="ref78">78</xref>]. The model exhibits strong zero-shot and few-shot learning capabilities in general tasks; however, the rapid evolution of medical knowledge necessitates continuous pretraining on extensive volumes of high-quality, domain-specific data. In data-scarce specialties, particularly those lacking sufficient fine-tuning datasets, DeepSeek often fails to effectively acquire new features and patterns, leading to model hallucinations, defined as the generation of seemingly plausible but factually incorrect or unsupported information [<xref ref-type="bibr" rid="ref36">36</xref>,<xref ref-type="bibr" rid="ref79">79</xref>]. Such limitations are particularly severe in domains involving rare diseases and complex, nonclassical clinical scenarios, where available pretraining data are often insufficient and clinically unvalidated [<xref ref-type="bibr" rid="ref36">36</xref>,<xref ref-type="bibr" rid="ref37">37</xref>,<xref ref-type="bibr" rid="ref80">80</xref>,<xref ref-type="bibr" rid="ref81">81</xref>]. Furthermore, as a fundamentally text-based model, DeepSeek exhibits inherent limitations in processing specialized nontextual medical data, such as medical images, complex laboratory metrics, and genomic data [<xref ref-type="bibr" rid="ref74">74</xref>,<xref ref-type="bibr" rid="ref82">82</xref>-<xref ref-type="bibr" rid="ref84">84</xref>]. These constraints collectively contribute to inconsistent model performance across specific medical domains and hinder its generalization.</p></sec><sec id="s4-5"><title>Ethical and Safety Risks</title><p>The integration of DeepSeek into medical practice raises ethical challenges that implicate all 4 foundational principles of biomedical ethics, namely autonomy, nonmaleficence, beneficence, and justice, which were originally proposed by Beauchamp and Childress in 1979 [<xref ref-type="bibr" rid="ref85">85</xref>].</p><sec id="s4-5-1"><title>Autonomy: Challenges to Patient Self-Determination and Informed Consent</title><p>The application of DeepSeek in medicine may undermine the principle of autonomy in medical ethics. As an open-source model, DeepSeek can be deployed on-premises in a hospital environment, which facilitates compliance with data privacy requirements [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref9">9</xref>,<xref ref-type="bibr" rid="ref81">81</xref>]. However, its broader adoption is complicated by varying regulatory frameworks across regions, such as the General Data Protection Regulation (GDPR) and the Health Insurance Portability and Accountability Act (HIPAA) [<xref ref-type="bibr" rid="ref3">3</xref>]. The Italian data protection authority, for instance, has restricted DeepSeek over concerns that its data handling methods fail to meet the strict privacy rules of the European Union [<xref ref-type="bibr" rid="ref81">81</xref>]. Although techniques such as chain-of-thought have enhanced the interpretability of decision-making, the model&#x2019;s fundamental &#x201C;black-box&#x201D; nature persists, posing practical challenges to informed consent in clinical applications [<xref ref-type="bibr" rid="ref60">60</xref>,<xref ref-type="bibr" rid="ref86">86</xref>-<xref ref-type="bibr" rid="ref90">90</xref>].</p></sec><sec id="s4-5-2"><title>Nonmaleficence: Risks of Novel and Amplified Harms</title><p>The rapid, cost-effective integration of DeepSeek in Chinese hospitals underscores a central paradox in medicine: how to seize the opportunity for transformative innovation while mitigating the risks of undue haste and still upholding the principle of &#x201C;first, do no harm&#x201D; [<xref ref-type="bibr" rid="ref2">2</xref>]. However, this model may provide overly definitive recommendations, potentially suggesting unnecessary tests or harmful treatments without adequate contextual warnings [<xref ref-type="bibr" rid="ref91">91</xref>,<xref ref-type="bibr" rid="ref92">92</xref>]. If clinicians over-rely on AI outputs, effectively delegating core cognitive tasks such as comprehensive analysis, differential diagnosis, and clinical judgment to the machine, it may lead to the erosion of clinical skills and their independent clinical reasoning. Furthermore, however data-driven its suggestions may be, DeepSeek may lack the nuanced and holistic understanding of a patient&#x2019;s psychosocial context that an experienced physician integrates. Collectively, these issues challenge the ethical principle of nonmaleficence.</p></sec><sec id="s4-5-3"><title>Beneficence: The Challenge of Defining and Delivering &#x201C;Good&#x201D;</title><p>The principle of beneficence obligates health care providers to act in ways that promote patients&#x2019; well-being and enhance clinical outcomes [<xref ref-type="bibr" rid="ref93">93</xref>]. However, an emphasis on AI-driven efficiency may unintentionally marginalize the irreplaceable human dimensions of medicine, such as empathy, compassion, and the therapeutic physician-patient relationship. Although systems like DeepSeek are adept at optimizing measurable, data-informed endpoints, the concept of &#x201C;good&#x201D; in medical practice encompasses psychosocial, spiritual, and qualitative aspects of care that resist easy quantification [<xref ref-type="bibr" rid="ref89">89</xref>,<xref ref-type="bibr" rid="ref94">94</xref>]. Overreliance on algorithmic pathways designed to maximize metrics neglects the holistic components of beneficence [<xref ref-type="bibr" rid="ref95">95</xref>]. Consequently, the physician&#x2019;s role as a compassionate interpreter of illness, which lies at the heart of medical beneficence, may be subordinate to the pursuit of algorithmic efficiency.</p></sec><sec id="s4-5-4"><title>Justice: Amplifying Inequities in Algorithmic Health Care</title><p>The principle of justice concerns fair and equitable distribution of health care benefits and burdens. Despite the use of data preprocessing techniques and fairness-aware algorithms, DeepSeek can still perpetuate and potentially amplify societal or health care biases present in its historical medical training data, including the underdiagnosis of certain conditions within specific demographic groups, thereby harming marginalized populations [<xref ref-type="bibr" rid="ref80">80</xref>,<xref ref-type="bibr" rid="ref88">88</xref>,<xref ref-type="bibr" rid="ref96">96</xref>]. Furthermore, because DeepSeek&#x2019;s training framework is primarily optimized for English and Chinese, it carries inherent lexical and cultural biases that may limit its applicability to global health care contexts [<xref ref-type="bibr" rid="ref12">12</xref>,<xref ref-type="bibr" rid="ref35">35</xref>,<xref ref-type="bibr" rid="ref97">97</xref>]. Additionally, the benefits of advanced AI, such as DeepSeek, are likely to accrue disproportionately to well-resourced tertiary-care urban hospitals equipped with the necessary infrastructure and specialized personnel for local deployment. Such unequal access exacerbates existing health disparities across regions and socioeconomic groups.</p></sec></sec><sec id="s4-6"><title>Other Challenges</title><p>In addition to challenges such as accuracy, variable performance across medical domains and specialties, and medical ethics and safety issues, the application of DeepSeek in medicine faces other obstacles, including the redesign of clinical workflows, delineation of liability, regulatory lag, and trust and adoption. The deployment of DeepSeek challenges some clinicians&#x2019; work habits and creates a demand for professionals who understand both clinical practice and AI. A shortage of talent limits its wider adoption. When errors in DeepSeek-assisted decision-making lead to medical incidents, how should legal responsibility be defined? Should it fall on the operating physician, the hospital that adopted the AI, or the model developers? Currently, global regulations in this field generally lag, and this uncertainty greatly dampens hospitals&#x2019; willingness to implement such technologies. Trust remains another challenge; although DeepSeek is easy to use, concerns about risks affect its acceptance [<xref ref-type="bibr" rid="ref87">87</xref>].</p></sec><sec id="s4-7"><title>Future Work in the Applications of DeepSeek in Medicine</title><p>Based on the aforementioned challenges, future research and development should prioritize the directions highlighted in the following sections to advance the reliable, ethical, and equitable integration of DeepSeek into medical practice.</p><sec id="s4-7-1"><title>From Benchmarking to Clinical Validation: Prospective and Pragmatic Studies</title><p>The current evidence base is dominated by low-quality, simulation-based studies. Future work should move beyond examination-style benchmarks and retrospective analyses toward prospective, multicenter, and pragmatic clinical trials. Specifically, RCTs are urgently needed to compare DeepSeek-assisted care against standard practice using both proximal performance metrics, such as diagnostic accuracy, and patient-relevant outcomes, including treatment adherence, adverse events, and quality of life [<xref ref-type="bibr" rid="ref98">98</xref>,<xref ref-type="bibr" rid="ref99">99</xref>]. Such trials should also evaluate human-AI interaction models, for example, human-in-the-loop versus fully automated approaches, to determine the optimal balance between efficiency and safety [<xref ref-type="bibr" rid="ref100">100</xref>,<xref ref-type="bibr" rid="ref101">101</xref>]. Furthermore, real-world implementation science frameworks should be applied to assess scalability, usability, and unintended consequences across diverse health care settings.</p></sec><sec id="s4-7-2"><title>Strengthening Governance, Explainability, and Safety</title><p>To address ethical and regulatory gaps, future work should co-develop clinically interpretable explainability methods tailored to DeepSeek&#x2019;s reasoning architecture. Techniques such as structured audit trails, uncertainty quantification, and natural language rationales can support informed consent and clinician oversight [<xref ref-type="bibr" rid="ref89">89</xref>,<xref ref-type="bibr" rid="ref102">102</xref>]. On the governance front, clear liability and accountability frameworks are required to delineate responsibilities among developers, health care institutions, and clinicians when AI-assisted errors occur [<xref ref-type="bibr" rid="ref88">88</xref>,<xref ref-type="bibr" rid="ref96">96</xref>]. Additionally, the &#x201C;human-in-command&#x201D; principle, which mandates that DeepSeek&#x2019;s recommendations serve as decision support rather than replacement for clinician judgment, should be embedded into clinical workflows and professional guidelines [<xref ref-type="bibr" rid="ref98">98</xref>,<xref ref-type="bibr" rid="ref103">103</xref>]. As articulated in the concept of AI-assisted medicine introduced by Wang et al [<xref ref-type="bibr" rid="ref104">104</xref>], a discipline that uses AI technologies to assist with disease research, prevention, diagnosis, and treatment as well as to promote health maintenance, clinicians must retain ultimate decision-making authority and accountability [<xref ref-type="bibr" rid="ref100">100</xref>,<xref ref-type="bibr" rid="ref101">101</xref>]. This conceptual foundation reinforces that AI remains a tool to augment, not supplant, human expertise.</p></sec><sec id="s4-7-3"><title>Mitigating Bias and Promoting Equitable Access</title><p>Despite DeepSeek&#x2019;s open-weight advantage, bias and inequity remain critical challenges. Future research should conduct systematic bias audits across demographic subgroups such as sex, socioeconomic status, and ethnicity using multi-institutional and multilingual datasets [<xref ref-type="bibr" rid="ref105">105</xref>,<xref ref-type="bibr" rid="ref106">106</xref>]. To avoid perpetuating health care disparities, developers should expand medically validated support beyond English and Chinese to other major world languages while adapting outputs to local clinical guidelines and cultural contexts [<xref ref-type="bibr" rid="ref12">12</xref>,<xref ref-type="bibr" rid="ref107">107</xref>].</p></sec><sec id="s4-7-4"><title>Redefining Medical Education and Workforce Development</title><p>The rapid adoption of DeepSeek demands a parallel evolution in medical curricula. Future educational interventions should cultivate &#x201C;AI literacy&#x201D;: the ability to critically appraise AI-generated recommendations; recognize hallucinations and bias; and integrate AI outputs with compassionate, patient-centered communication [<xref ref-type="bibr" rid="ref98">98</xref>,<xref ref-type="bibr" rid="ref108">108</xref>]. Institutions should develop interdisciplinary training programs that bridge clinical practice and data science to build a workforce capable of deploying, auditing, and improving medical AI systems. Finally, professional societies should establish certification and continuing education standards for AI-augmented clinical practice.</p></sec><sec id="s4-7-5"><title>Unexplored Domains and Long-Term Monitoring</title><p>Most current research focuses on diagnosis, medical education, and workflow efficiency, leaving prevention and long-term care underexplored. Future investigations should prioritize disease prevention, population health management, and long-term care [<xref ref-type="bibr" rid="ref103">103</xref>,<xref ref-type="bibr" rid="ref109">109</xref>]. Additionally, postdeployment surveillance systems should be established to monitor real-world performance, detect emergent harms, and enable continuous model improvement, closing the loop from evidence generation to sustained safe implementation [<xref ref-type="bibr" rid="ref9">9</xref>,<xref ref-type="bibr" rid="ref90">90</xref>].</p></sec></sec><sec id="s4-8"><title>Limitations of the Study</title><p>Several limitations of this study should be considered when interpreting the findings. First, the review covered literature published over a relatively short and recent timeframe. Consequently, the observed surge in publications may reflect early enthusiasm rather than sustained scientific progress. Second, although a language-agnostic search strategy was used, most included studies were published in English, with only a small number (n=13) in Chinese. This linguistic imbalance, coupled with the predominance of contributions from researchers based in China, indicates a notable geographical concentration of the available evidence. As a result, the findings may not be directly generalizable to health care systems operating within different regulatory, cultural, or infrastructural contexts. Third, the included studies exhibited substantial heterogeneity in methodologies, medical specialties, evaluation metrics, comparator models, and DeepSeek model versions&#x2014;for example, R1 versus V3, which differ in parameter counts, training data, and reasoning depth. This variability precluded quantitative synthesis of outcomes and hindered direct cross-study comparisons. Although we reported version-specific findings where available, direct comparisons of performance should be interpreted with caution. Future research should adopt standardized version reporting and benchmark against fixed model checkpoints to enhance comparability and reproducibility. Finally, much of the evidence is derived from benchmarking studies, simulated cases, or retrospective analyses, with a formal quality appraisal showing that 66.6% (235/353) of included original articles were of low quality and only 6.8% (24/353) met the criteria to be considered high quality. Prospective clinical trials or RCTs assessing DeepSeek&#x2019;s impact on tangible patient health outcomes in real-world clinical settings remain notably scarce. Consequently, the overall quality of the evidence base is inherently preliminary, and the reviewed corpus carries a high risk of bias. The reported strengths of DeepSeek should be interpreted with caution, as these findings predominantly derive from low-quality, controlled, nongeneralizable settings.</p></sec><sec id="s4-9"><title>Conclusion</title><p>This integrated bibliometric and scoping review synthesized the available evidence on DeepSeek&#x2019;s applications in medicine. The bibliometric analysis revealed a progressive increase in publication output from January 2025 through November 2025, with China, Turkey, and the United States as the leading contributors. Keyword co-occurrence analysis formed 7 clusters; the 3 most frequent keywords were &#x201C;large language model,&#x201D; &#x201C;artificial intelligence,&#x201D; and &#x201C;patient education.&#x201D;</p><p>The scoping review found that DeepSeek has been evaluated across 5 primary application domains: patient education and communication, clinical decision support and treatment planning, medical education and benchmarking, clinical workflow optimization, and medical research and data analysis. In these domains, DeepSeek demonstrated variable but often competitive performance compared with proprietary models, with documented strengths in readability of patient education materials, diagnostic accuracy in select specialties, cost-efficiency, and local deployability. Nevertheless, it should be noted that most included studies were of moderate or low quality, and the evidence base is predominantly composed of benchmarking and simulation studies, with a notable scarcity of prospective clinical trials or RCTs assessing patient-relevant outcomes. Additionally, the review identified consistent limitations, including variable performance across medical specialties, model hallucinations, ethical concerns, data privacy challenges, and regulatory gaps. Future integration will require robust prospective clinical validation, expansion of multimodal capabilities, bias mitigation strategies, human-in-the-loop governance frameworks, and equitable access strategies.</p></sec></sec></body><back><ack><p>The authors would like to acknowledge Editage for English language editing [<xref ref-type="bibr" rid="ref110">110</xref>].</p></ack><notes><sec><title>Funding</title><p>This work was supported by the Science and Technology Project of Jinan Health Commission (grant 2020-3-02).</p></sec><sec><title>Data Availability</title><p>All data generated or analyzed during this study are included in this published article and its multimedia appendices.</p></sec></notes><fn-group><fn fn-type="con"><p>Conceptualization: GW</p><p>Data curation: HZ, DW</p><p>Formal analysis: YX, SH</p><p>Funding acquisition: GW</p><p>Methodology: HZ, DW</p><p>Resources: HZ, DW, YX, SH</p><p>Software: HZ, DW, GW</p><p>Supervision: GW</p><p>Visualization: HZ, SH</p><p>Writing &#x2013; original draft: HZ, DW, YX, SH</p><p>Writing &#x2013; review &#x0026; editing: GW</p><p>All the authors read and approved the final manuscript.</p></fn><fn fn-type="conflict"><p>None declared.</p></fn></fn-group><glossary><title>Abbreviations</title><def-list><def-item><term id="abb1">AI</term><def><p>artificial intelligence</p></def></def-item><def-item><term id="abb2">APY</term><def><p>average publication year</p></def></def-item><def-item><term id="abb3">BIBLIO</term><def><p>bibliometric reviews of biomedical literature</p></def></def-item><def-item><term id="abb4">CAD-RADS</term><def><p>Coronary Artery Disease Reporting and Data System</p></def></def-item><def-item><term id="abb5">GDPR</term><def><p>General Data Protection Regulation</p></def></def-item><def-item><term id="abb6">GRPO</term><def><p>Group Relative Policy Optimization</p></def></def-item><def-item><term id="abb7">HIPAA</term><def><p>Health Insurance Portability and Accountability Act</p></def></def-item><def-item><term id="abb8">LI-RADS</term><def><p>Liver Imaging Reporting and Data System</p></def></def-item><def-item><term id="abb9">LLM</term><def><p>large language model</p></def></def-item><def-item><term id="abb10">MeSH</term><def><p>medical subject headings</p></def></def-item><def-item><term id="abb11">PRISMA-ScR</term><def><p>Preferred Reporting Items for Systematic Reviews and Meta-Analyses Extension for Scoping Reviews</p></def></def-item><def-item><term id="abb12">RCT</term><def><p>randomized controlled trial</p></def></def-item><def-item><term id="abb13">WoSCC</term><def><p>Web of Science Core Collection</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sandmann</surname><given-names>S</given-names> </name><name name-style="western"><surname>Hegselmann</surname><given-names>S</given-names> </name><name name-style="western"><surname>Fujarski</surname><given-names>M</given-names> </name><etal/></person-group><article-title>Benchmark evaluation of DeepSeek large language models in clinical decision-making</article-title><source>Nat Med</source><year>2025</year><month>08</month><volume>31</volume><issue>8</issue><fpage>2546</fpage><lpage>2549</lpage><pub-id pub-id-type="doi">10.1038/s41591-025-03727-2</pub-id><pub-id pub-id-type="medline">40267970</pub-id></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Zeng</surname><given-names>D</given-names> </name><name name-style="western"><surname>Qin</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Sheng</surname><given-names>B</given-names> </name><name name-style="western"><surname>Wong</surname><given-names>TY</given-names> </name></person-group><article-title>DeepSeek&#x2019;s &#x201C;low-cost&#x201D; adoption across China&#x2019;s hospital systems: too fast, too soon?</article-title><source>JAMA</source><year>2025</year><month>06</month><day>3</day><volume>333</volume><issue>21</issue><fpage>1866</fpage><lpage>1869</lpage><pub-id pub-id-type="doi">10.1001/jama.2025.6571</pub-id><pub-id pub-id-type="medline">40293869</pub-id></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>MohanaSundaram</surname><given-names>A</given-names> </name><name name-style="western"><surname>Sathanantham</surname><given-names>ST</given-names> </name><name name-style="western"><surname>Ivanov</surname><given-names>A</given-names> </name><name name-style="western"><surname>Mofatteh</surname><given-names>M</given-names> </name></person-group><article-title>DeepSeek&#x2019;s readiness for medical research and practice: prospects, bottlenecks, and global regulatory constraints</article-title><source>Ann Biomed Eng</source><year>2025</year><month>07</month><volume>53</volume><issue>7</issue><fpage>1754</fpage><lpage>1756</lpage><pub-id pub-id-type="doi">10.1007/s10439-025-03738-7</pub-id><pub-id pub-id-type="medline">40272697</pub-id></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Jin</surname><given-names>I</given-names> </name><name name-style="western"><surname>Tangsrivimol</surname><given-names>JA</given-names> </name><name name-style="western"><surname>Darzi</surname><given-names>E</given-names> </name><etal/></person-group><article-title>DeepSeek vs. ChatGPT: prospects and challenges</article-title><source>Front Artif Intell</source><year>2025</year><volume>8</volume><fpage>1576992</fpage><pub-id pub-id-type="doi">10.3389/frai.2025.1576992</pub-id><pub-id pub-id-type="medline">40612384</pub-id></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Guo</surname><given-names>D</given-names> </name><name name-style="western"><surname>Yang</surname><given-names>D</given-names> </name><name name-style="western"><surname>Zhang</surname><given-names>H</given-names> </name><etal/></person-group><article-title>DeepSeek-R1 incentivizes reasoning in LLMs through reinforcement learning</article-title><source>Nature New Biol</source><year>2025</year><month>09</month><volume>645</volume><issue>8081</issue><fpage>633</fpage><lpage>638</lpage><pub-id pub-id-type="doi">10.1038/s41586-025-09422-z</pub-id><pub-id pub-id-type="medline">40962978</pub-id></nlm-citation></ref><ref id="ref6"><label>6</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Lv</surname><given-names>J</given-names> </name><name name-style="western"><surname>Xu</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Jiang</surname><given-names>M</given-names> </name><etal/></person-group><article-title>A DeepSeek-powered locally deployed closed-loop system for enhancing quality control in electronic nursing documentation: development and clinical validation</article-title><source>J Am Med Inform Assoc</source><year>2025</year><month>10</month><day>1</day><volume>32</volume><issue>10</issue><fpage>1526</fpage><lpage>1532</lpage><pub-id pub-id-type="doi">10.1093/jamia/ocaf109</pub-id><pub-id pub-id-type="medline">40668938</pub-id></nlm-citation></ref><ref id="ref7"><label>7</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wang</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Tan</surname><given-names>W</given-names> </name><name name-style="western"><surname>Cheng</surname><given-names>S</given-names> </name><etal/></person-group><article-title>Large language model agent for managing patients with suspected hypertension</article-title><source>Hypertension</source><year>2026</year><month>01</month><volume>83</volume><issue>1</issue><fpage>212</fpage><lpage>224</lpage><pub-id pub-id-type="doi">10.1161/HYPERTENSIONAHA.125.25305</pub-id><pub-id pub-id-type="medline">41064862</pub-id></nlm-citation></ref><ref id="ref8"><label>8</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Miao</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Wen</surname><given-names>J</given-names> </name><name name-style="western"><surname>Luo</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Li</surname><given-names>J</given-names> </name></person-group><article-title>MedARC: Adaptive multi-agent refinement and collaboration for enhanced medical reasoning in large language models</article-title><source>Int J Med Inform</source><year>2026</year><month>02</month><volume>206</volume><fpage>106136</fpage><pub-id pub-id-type="doi">10.1016/j.ijmedinf.2025.106136</pub-id><pub-id pub-id-type="medline">41109093</pub-id></nlm-citation></ref><ref id="ref9"><label>9</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Chen</surname><given-names>J</given-names> </name><name name-style="western"><surname>Miao</surname><given-names>C</given-names> </name></person-group><article-title>DeepSeek deployed in 90 Chinese tertiary hospitals: how artificial intelligence is transforming clinical practice</article-title><source>J Med Syst</source><year>2025</year><month>04</month><day>24</day><volume>49</volume><issue>1</issue><fpage>53</fpage><pub-id pub-id-type="doi">10.1007/s10916-025-02181-4</pub-id><pub-id pub-id-type="medline">40272650</pub-id></nlm-citation></ref><ref id="ref10"><label>10</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Chan</surname><given-names>L</given-names> </name><name name-style="western"><surname>Xu</surname><given-names>X</given-names> </name><name name-style="western"><surname>Lv</surname><given-names>K</given-names> </name></person-group><article-title>DeepSeek-R1 and GPT-4 are comparable in a complex diagnostic challenge: a historical control study</article-title><source>Int J Surg</source><year>2025</year><volume>111</volume><issue>6</issue><fpage>4056</fpage><lpage>4059</lpage><pub-id pub-id-type="doi">10.1097/JS9.0000000000002386</pub-id></nlm-citation></ref><ref id="ref11"><label>11</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Jiao</surname><given-names>C</given-names> </name><name name-style="western"><surname>Rosas</surname><given-names>E</given-names> </name><name name-style="western"><surname>Asadigandomani</surname><given-names>H</given-names> </name><etal/></person-group><article-title>Diagnostic performance of publicly available large language models in corneal diseases: a comparison with human specialists</article-title><source>Diagnostics (Basel)</source><year>2025</year><month>05</month><day>13</day><volume>15</volume><issue>10</issue><fpage>1221</fpage><pub-id pub-id-type="doi">10.3390/diagnostics15101221</pub-id><pub-id pub-id-type="medline">40428214</pub-id></nlm-citation></ref><ref id="ref12"><label>12</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Luo</surname><given-names>PW</given-names> </name><name name-style="western"><surname>Liu</surname><given-names>JW</given-names> </name><name name-style="western"><surname>Xie</surname><given-names>X</given-names> </name><etal/></person-group><article-title>DeepSeek vs ChatGPT: a comparison study of their performance in answering prostate cancer radiotherapy questions in multiple languages</article-title><source>Am J Clin Exp Urol</source><year>2025</year><volume>13</volume><issue>2</issue><fpage>176</fpage><lpage>185</lpage><pub-id pub-id-type="doi">10.62347/UIAP7979</pub-id><pub-id pub-id-type="medline">40400997</pub-id></nlm-citation></ref><ref id="ref13"><label>13</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wu</surname><given-names>J</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Qin</surname><given-names>Y</given-names> </name></person-group><article-title>Performance of DeepSeek-R1 and ChatGPT-4o on the Chinese National Medical Licensing Examination: a comparative study</article-title><source>J Med Syst</source><year>2025</year><month>06</month><day>3</day><volume>49</volume><issue>1</issue><fpage>74</fpage><pub-id pub-id-type="doi">10.1007/s10916-025-02213-z</pub-id><pub-id pub-id-type="medline">40459679</pub-id></nlm-citation></ref><ref id="ref14"><label>14</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Huang</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Wan</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Chen</surname><given-names>J</given-names> </name><name name-style="western"><surname>Qin</surname><given-names>M</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>J</given-names> </name><name name-style="western"><surname>Liang</surname><given-names>H</given-names> </name></person-group><article-title>Knowledge mapping of biomarkers in amyotrophic lateral sclerosis: a comprehensive bibliometric and visual analysis</article-title><source>Neurodegener Dis Manag</source><year>2026</year><month>04</month><volume>16</volume><issue>2</issue><fpage>191</fpage><lpage>207</lpage><pub-id pub-id-type="doi">10.1080/17582024.2025.2554525</pub-id><pub-id pub-id-type="medline">40905501</pub-id></nlm-citation></ref><ref id="ref15"><label>15</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Chen</surname><given-names>X</given-names> </name><name name-style="western"><surname>Yang</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Yun</surname><given-names>D</given-names> </name><etal/></person-group><article-title>Current status and solutions for AI ethics in ophthalmology: a bibliometric analysis</article-title><source>NPJ Digit Med</source><year>2025</year><month>10</month><day>2</day><volume>8</volume><issue>1</issue><fpage>594</fpage><pub-id pub-id-type="doi">10.1038/s41746-025-01976-6</pub-id></nlm-citation></ref><ref id="ref16"><label>16</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Levac</surname><given-names>D</given-names> </name><name name-style="western"><surname>Colquhoun</surname><given-names>H</given-names> </name><name name-style="western"><surname>O&#x2019;Brien</surname><given-names>KK</given-names> </name></person-group><article-title>Scoping studies: advancing the methodology</article-title><source>Implement Sci</source><year>2010</year><month>09</month><day>20</day><volume>5</volume><fpage>69</fpage><pub-id pub-id-type="doi">10.1186/1748-5908-5-69</pub-id><pub-id pub-id-type="medline">20854677</pub-id></nlm-citation></ref><ref id="ref17"><label>17</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Cobo</surname><given-names>MJ</given-names> </name><name name-style="western"><surname>L&#x00F3;pez-Herrera</surname><given-names>AG</given-names> </name><name name-style="western"><surname>Herrera-Viedma</surname><given-names>E</given-names> </name><name name-style="western"><surname>Herrera</surname><given-names>F</given-names> </name></person-group><article-title>Science mapping software tools: review, analysis, and cooperative study among tools</article-title><source>J Am Soc Inf Sci</source><year>2011</year><month>07</month><volume>62</volume><issue>7</issue><fpage>1382</fpage><lpage>1402</lpage><comment><ext-link ext-link-type="uri" xlink:href="http://doi.wiley.com/10.1002/asi.v62.7">http://doi.wiley.com/10.1002/asi.v62.7</ext-link></comment><pub-id pub-id-type="doi">10.1002/asi.21525</pub-id></nlm-citation></ref><ref id="ref18"><label>18</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Montazeri</surname><given-names>A</given-names> </name><name name-style="western"><surname>Mohammadi</surname><given-names>S</given-names> </name><name name-style="western"><surname>M Hesari</surname><given-names>P</given-names> </name><name name-style="western"><surname>Ghaemi</surname><given-names>M</given-names> </name><name name-style="western"><surname>Riazi</surname><given-names>H</given-names> </name><name name-style="western"><surname>Sheikhi-Mobarakeh</surname><given-names>Z</given-names> </name></person-group><article-title>Preliminary guideline for reporting bibliometric reviews of the biomedical literature (BIBLIO): a minimum requirements</article-title><source>Syst Rev</source><year>2023</year><month>12</month><day>15</day><volume>12</volume><issue>1</issue><fpage>239</fpage><pub-id pub-id-type="doi">10.1186/s13643-023-02410-2</pub-id><pub-id pub-id-type="medline">38102710</pub-id></nlm-citation></ref><ref id="ref19"><label>19</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Arksey</surname><given-names>H</given-names> </name><name name-style="western"><surname>O&#x2019;Malley</surname><given-names>L</given-names> </name></person-group><article-title>Scoping studies: towards a methodological framework</article-title><source>Int J Soc Res Methodol</source><year>2005</year><month>02</month><volume>8</volume><issue>1</issue><fpage>19</fpage><lpage>32</lpage><pub-id pub-id-type="doi">10.1080/1364557032000119616</pub-id></nlm-citation></ref><ref id="ref20"><label>20</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Tricco</surname><given-names>AC</given-names> </name><name name-style="western"><surname>Lillie</surname><given-names>E</given-names> </name><name name-style="western"><surname>Zarin</surname><given-names>W</given-names> </name><etal/></person-group><article-title>PRISMA Extension for Scoping Reviews (PRISMA-ScR): checklist and explanation</article-title><source>Ann Intern Med</source><year>2018</year><month>10</month><day>2</day><volume>169</volume><issue>7</issue><fpage>467</fpage><lpage>473</lpage><pub-id pub-id-type="doi">10.7326/M18-0850</pub-id><pub-id pub-id-type="medline">30178033</pub-id></nlm-citation></ref><ref id="ref21"><label>21</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Gibney</surname><given-names>E</given-names> </name></person-group><article-title>China&#x2019;s cheap, open AI model DeepSeek thrills scientists</article-title><source>Nature New Biol</source><year>2025</year><month>02</month><day>6</day><volume>638</volume><issue>8049</issue><fpage>13</fpage><lpage>14</lpage><pub-id pub-id-type="doi">10.1038/d41586-025-00229-6</pub-id></nlm-citation></ref><ref id="ref22"><label>22</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Conroy</surname><given-names>G</given-names> </name><name name-style="western"><surname>Mallapaty</surname><given-names>S</given-names> </name></person-group><article-title>How China created AI model DeepSeek and shocked the world</article-title><source>Nature New Biol</source><year>2025</year><month>02</month><day>13</day><volume>638</volume><issue>8050</issue><fpage>300</fpage><lpage>301</lpage><pub-id pub-id-type="doi">10.1038/d41586-025-00259-0</pub-id></nlm-citation></ref><ref id="ref23"><label>23</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Zhou</surname><given-names>M</given-names> </name><name name-style="western"><surname>Pan</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Zhang</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Song</surname><given-names>X</given-names> </name><name name-style="western"><surname>Zhou</surname><given-names>Y</given-names> </name></person-group><article-title>Evaluating AI-generated patient education materials for spinal surgeries: comparative analysis of readability and DISCERN quality across ChatGPT and DeepSeek models</article-title><source>Int J Med Inform</source><year>2025</year><month>06</month><volume>198</volume><fpage>105871</fpage><pub-id pub-id-type="doi">10.1016/j.ijmedinf.2025.105871</pub-id><pub-id pub-id-type="medline">40107040</pub-id></nlm-citation></ref><ref id="ref24"><label>24</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Deng</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Ma</surname><given-names>W</given-names> </name><name name-style="western"><surname>Han</surname><given-names>QL</given-names> </name><etal/></person-group><article-title>Exploring DeepSeek: a survey on advances, applications, challenges and future directions</article-title><source>IEEE/CAA J Autom Sinica</source><year>2025</year><month>05</month><volume>12</volume><issue>5</issue><fpage>872</fpage><lpage>893</lpage><pub-id pub-id-type="doi">10.1109/JAS.2025.125498</pub-id></nlm-citation></ref><ref id="ref25"><label>25</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kaygisiz</surname><given-names>&#x00D6;F</given-names> </name><name name-style="western"><surname>Teke</surname><given-names>MT</given-names> </name></person-group><article-title>Can DeepSeek and ChatGPT be used in the diagnosis of oral pathologies?</article-title><source>BMC Oral Health</source><year>2025</year><month>04</month><day>25</day><volume>25</volume><issue>1</issue><fpage>638</fpage><pub-id pub-id-type="doi">10.1186/s12903-025-06034-x</pub-id><pub-id pub-id-type="medline">40281436</pub-id></nlm-citation></ref><ref id="ref26"><label>26</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Rasool</surname><given-names>A</given-names> </name><name name-style="western"><surname>Shahzad</surname><given-names>MI</given-names> </name><name name-style="western"><surname>Aslam</surname><given-names>H</given-names> </name><name name-style="western"><surname>Chan</surname><given-names>V</given-names> </name><name name-style="western"><surname>Arshad</surname><given-names>MA</given-names> </name></person-group><article-title>Emotion-aware embedding fusion in large language models (Flan-T5, Llama 2, DeepSeek-R1, and ChatGPT 4) for intelligent response generation</article-title><source>AI</source><year>2025</year><month>03</month><day>13</day><volume>6</volume><issue>3</issue><fpage>56</fpage><pub-id pub-id-type="doi">10.3390/ai6030056</pub-id></nlm-citation></ref><ref id="ref27"><label>27</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Yilmaz</surname><given-names>BE</given-names> </name><name name-style="western"><surname>Gokkurt Yilmaz</surname><given-names>BN</given-names> </name><name name-style="western"><surname>Ozbey</surname><given-names>F</given-names> </name></person-group><article-title>Artificial intelligence performance in answering multiple-choice oral pathology questions: a comparative analysis</article-title><source>BMC Oral Health</source><year>2025</year><month>04</month><day>15</day><volume>25</volume><issue>1</issue><fpage>573</fpage><pub-id pub-id-type="doi">10.1186/s12903-025-05926-2</pub-id><pub-id pub-id-type="medline">40234873</pub-id></nlm-citation></ref><ref id="ref28"><label>28</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Marcaccini</surname><given-names>G</given-names> </name><name name-style="western"><surname>Seth</surname><given-names>I</given-names> </name><name name-style="western"><surname>Xie</surname><given-names>Y</given-names> </name><etal/></person-group><article-title>Breaking bones, breaking barriers: ChatGPT, DeepSeek, and Gemini in hand fracture management</article-title><source>J Clin Med</source><year>2025</year><month>03</month><day>14</day><volume>14</volume><issue>6</issue><fpage>1983</fpage><pub-id pub-id-type="doi">10.3390/jcm14061983</pub-id><pub-id pub-id-type="medline">40142791</pub-id></nlm-citation></ref><ref id="ref29"><label>29</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>&#x00D6;zcivelek</surname><given-names>T</given-names> </name><name name-style="western"><surname>&#x00D6;zcan</surname><given-names>B</given-names> </name></person-group><article-title>Comparative evaluation of responses from DeepSeek-R1, ChatGPT-o1, ChatGPT-4, and dental GPT chatbots to patient inquiries about dental and maxillofacial prostheses</article-title><source>BMC Oral Health</source><year>2025</year><month>05</month><day>31</day><volume>25</volume><issue>1</issue><fpage>871</fpage><pub-id pub-id-type="doi">10.1186/s12903-025-06267-w</pub-id><pub-id pub-id-type="medline">40450291</pub-id></nlm-citation></ref><ref id="ref30"><label>30</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>G&#x00FC;ltekin</surname><given-names>O</given-names> </name><name name-style="western"><surname>Inoue</surname><given-names>J</given-names> </name><name name-style="western"><surname>Yilmaz</surname><given-names>B</given-names> </name><etal/></person-group><article-title>Evaluating DeepResearch and DeepThink in anterior cruciate ligament surgery patient education: ChatGPT-4o excels in comprehensiveness, DeepSeek R1 leads in clarity and readability of orthopaedic information</article-title><source>Knee Surg Sports Traumatol Arthrosc</source><year>2025</year><month>08</month><volume>33</volume><issue>8</issue><fpage>3025</fpage><lpage>3031</lpage><pub-id pub-id-type="doi">10.1002/ksa.12711</pub-id><pub-id pub-id-type="medline">40450565</pub-id></nlm-citation></ref><ref id="ref31"><label>31</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Seth</surname><given-names>I</given-names> </name><name name-style="western"><surname>Marcaccini</surname><given-names>G</given-names> </name><name name-style="western"><surname>Lim</surname><given-names>K</given-names> </name><etal/></person-group><article-title>Management of Dupuytren&#x2019;s disease: a multi-centric comparative analysis between experienced hand surgeons versus artificial intelligence</article-title><source>Diagnostics (Basel)</source><year>2025</year><month>02</month><day>28</day><volume>15</volume><issue>5</issue><fpage>587</fpage><pub-id pub-id-type="doi">10.3390/diagnostics15050587</pub-id><pub-id pub-id-type="medline">40075834</pub-id></nlm-citation></ref><ref id="ref32"><label>32</label><nlm-citation citation-type="web"><source>VOSviewer</source><access-date>2026-06-10</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.vosviewer.com/">https://www.vosviewer.com/</ext-link></comment></nlm-citation></ref><ref id="ref33"><label>33</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>van Eck</surname><given-names>NJ</given-names> </name><name name-style="western"><surname>Waltman</surname><given-names>L</given-names> </name></person-group><article-title>Software survey: VOSviewer, a computer program for bibliometric mapping</article-title><source>Scientometrics</source><year>2010</year><volume>84</volume><fpage>523</fpage><lpage>538</lpage><pub-id pub-id-type="doi">10.1007/s11192-009-0146-3</pub-id><pub-id pub-id-type="medline">20585380</pub-id></nlm-citation></ref><ref id="ref34"><label>34</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Lau</surname><given-names>JYS</given-names> </name><name name-style="western"><surname>Gerald Sng</surname><given-names>GR</given-names> </name><name name-style="western"><surname>Cao</surname><given-names>R</given-names> </name><name name-style="western"><surname>Chen</surname><given-names>J</given-names> </name></person-group><article-title>A comparative study of ChatGPT and DeepSeek in spinal cord injury patient education: can artificial intelligence &#x201C;speak&#x201D; spinal cord injury?</article-title><source>J Spinal Cord Med</source><year>2026</year><month>05</month><volume>49</volume><issue>3</issue><fpage>618</fpage><lpage>623</lpage><pub-id pub-id-type="doi">10.1080/10790268.2025.2554013</pub-id><pub-id pub-id-type="medline">40938207</pub-id></nlm-citation></ref><ref id="ref35"><label>35</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Liu</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Yu</surname><given-names>F</given-names> </name><name name-style="western"><surname>Zhang</surname><given-names>X</given-names> </name><etal/></person-group><article-title>Assessing the role of large language models between ChatGPT and DeepSeek in asthma education for bilingual individuals: comparative study</article-title><source>JMIR Med Inform</source><year>2025</year><month>08</month><day>13</day><volume>13</volume><fpage>e65365</fpage><pub-id pub-id-type="doi">10.2196/65365</pub-id><pub-id pub-id-type="medline">40802989</pub-id></nlm-citation></ref><ref id="ref36"><label>36</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Uldin</surname><given-names>H</given-names> </name><name name-style="western"><surname>Saran</surname><given-names>S</given-names> </name><name name-style="western"><surname>Gandikota</surname><given-names>G</given-names> </name><etal/></person-group><article-title>A comparison of performance of DeepSeek-R1 model-generated responses to musculoskeletal radiology queries against ChatGPT-4 and ChatGPT-4o - a feasibility study</article-title><source>Clin Imaging</source><year>2025</year><month>07</month><volume>123</volume><fpage>110506</fpage><pub-id pub-id-type="doi">10.1016/j.clinimag.2025.110506</pub-id><pub-id pub-id-type="medline">40381536</pub-id></nlm-citation></ref><ref id="ref37"><label>37</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wu</surname><given-names>H</given-names> </name><name name-style="western"><surname>Yao</surname><given-names>S</given-names> </name><name name-style="western"><surname>Bao</surname><given-names>H</given-names> </name><name name-style="western"><surname>Guo</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Xu</surname><given-names>C</given-names> </name><name name-style="western"><surname>Ma</surname><given-names>J</given-names> </name></person-group><article-title>ChatGPT-4.0 and DeepSeek-R1 does not yet provide clinically supported answers for knee osteoarthritis</article-title><source>Knee</source><year>2025</year><month>10</month><volume>56</volume><fpage>386</fpage><lpage>396</lpage><pub-id pub-id-type="doi">10.1016/j.knee.2025.06.007</pub-id><pub-id pub-id-type="medline">40618549</pub-id></nlm-citation></ref><ref id="ref38"><label>38</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Alluri</surname><given-names>AA</given-names> </name><name name-style="western"><surname>Khan</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Krithika</surname><given-names>V</given-names> </name><etal/></person-group><article-title>Assessing the suitability of ChatGPT and DeepSeek AI for patient education on common rheumatological disorders</article-title><source>Cureus</source><year>2025</year><month>08</month><volume>17</volume><issue>8</issue><fpage>e90600</fpage><pub-id pub-id-type="doi">10.7759/cureus.90600</pub-id><pub-id pub-id-type="medline">40984935</pub-id></nlm-citation></ref><ref id="ref39"><label>39</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Gurbuz</surname><given-names>S</given-names> </name><name name-style="western"><surname>Bahar</surname><given-names>H</given-names> </name><name name-style="western"><surname>Yavuz</surname><given-names>U</given-names> </name><name name-style="western"><surname>Keskin</surname><given-names>A</given-names> </name><name name-style="western"><surname>Karslioglu</surname><given-names>B</given-names> </name><name name-style="western"><surname>Solak</surname><given-names>Y</given-names> </name></person-group><article-title>Comparative efficacy of ChatGPT and DeepSeek in addressing patient queries on gonarthrosis and total knee arthroplasty</article-title><source>Arthroplast Today</source><year>2025</year><month>06</month><volume>33</volume><fpage>101730</fpage><pub-id pub-id-type="doi">10.1016/j.artd.2025.101730</pub-id><pub-id pub-id-type="medline">40521295</pub-id></nlm-citation></ref><ref id="ref40"><label>40</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Zhang</surname><given-names>J</given-names> </name><name name-style="western"><surname>Liu</surname><given-names>J</given-names> </name><name name-style="western"><surname>Guo</surname><given-names>M</given-names> </name><name name-style="western"><surname>Zhang</surname><given-names>X</given-names> </name><name name-style="western"><surname>Xiao</surname><given-names>W</given-names> </name><name name-style="western"><surname>Chen</surname><given-names>F</given-names> </name></person-group><article-title>DeepSeek-assisted LI-RADS classification: AI-driven precision in hepatocellular carcinoma diagnosis</article-title><source>Int J Surg</source><year>2025</year><volume>111</volume><issue>9</issue><fpage>5970</fpage><lpage>5979</lpage><pub-id pub-id-type="doi">10.1097/JS9.0000000000002763</pub-id></nlm-citation></ref><ref id="ref41"><label>41</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Vural Camalan</surname><given-names>B</given-names> </name><name name-style="western"><surname>Doluoglu</surname><given-names>S</given-names> </name><name name-style="western"><surname>Taraf</surname><given-names>NH</given-names> </name><name name-style="western"><surname>Gunay</surname><given-names>MM</given-names> </name><name name-style="western"><surname>Ozlugedik</surname><given-names>S</given-names> </name></person-group><article-title>ChatGPT versus DeepSeek in head and neck cancer staging and treatment planning: guideline-based study</article-title><source>Eur Arch Otorhinolaryngol</source><year>2025</year><month>09</month><volume>282</volume><issue>9</issue><fpage>4815</fpage><lpage>4824</lpage><pub-id pub-id-type="doi">10.1007/s00405-025-09524-4</pub-id><pub-id pub-id-type="medline">40523995</pub-id></nlm-citation></ref><ref id="ref42"><label>42</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Mikhail</surname><given-names>D</given-names> </name><name name-style="western"><surname>Farah</surname><given-names>A</given-names> </name><name name-style="western"><surname>Milad</surname><given-names>J</given-names> </name><etal/></person-group><article-title>DeepSeek-R1 vs OpenAI o1 for ophthalmic diagnoses and management plans</article-title><source>JAMA Ophthalmol</source><year>2025</year><month>10</month><day>1</day><volume>143</volume><issue>10</issue><fpage>834</fpage><lpage>842</lpage><pub-id pub-id-type="doi">10.1001/jamaophthalmol.2025.2918</pub-id><pub-id pub-id-type="medline">40906471</pub-id></nlm-citation></ref><ref id="ref43"><label>43</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Cao</surname><given-names>H</given-names> </name><name name-style="western"><surname>Hao</surname><given-names>C</given-names> </name><name name-style="western"><surname>Zhang</surname><given-names>T</given-names> </name><etal/></person-group><article-title>Battle of the artificial intelligence: a comprehensive comparative analysis of DeepSeek and ChatGPT for urinary incontinence-related questions</article-title><source>Front Public Health</source><year>2025</year><volume>13</volume><fpage>1605908</fpage><pub-id pub-id-type="doi">10.3389/fpubh.2025.1605908</pub-id><pub-id pub-id-type="medline">40771241</pub-id></nlm-citation></ref><ref id="ref44"><label>44</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Pinto</surname><given-names>VBP</given-names> </name><name name-style="western"><surname>Ata&#x00ED;des</surname><given-names>RJC</given-names> </name><name name-style="western"><surname>do Nascimento</surname><given-names>LAP</given-names> </name><etal/></person-group><article-title>Performance of ChatGPT and DeepSeek in the management of postprostatectomy urinary incontinence</article-title><source>Int Braz J Urol</source><year>2025</year><volume>51</volume><issue>6</issue><fpage>e20250325</fpage><pub-id pub-id-type="doi">10.1590/S1677-5538.IBJU.2025.0325</pub-id><pub-id pub-id-type="medline">40857549</pub-id></nlm-citation></ref><ref id="ref45"><label>45</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ibrahim</surname><given-names>AF</given-names> </name><name name-style="western"><surname>Danpanichkul</surname><given-names>P</given-names> </name><name name-style="western"><surname>Hayek</surname><given-names>A</given-names> </name><etal/></person-group><article-title>Artificial intelligence in gastroenterology education: DeepSeek passes the gastroenterology board examination and outperforms legacy ChatGPT models</article-title><source>Am J Gastroenterol</source><year>2026</year><month>04</month><day>1</day><volume>121</volume><issue>4</issue><fpage>1041</fpage><lpage>1043</lpage><pub-id pub-id-type="doi">10.14309/ajg.0000000000003552</pub-id><pub-id pub-id-type="medline">40392256</pub-id></nlm-citation></ref><ref id="ref46"><label>46</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Meo</surname><given-names>SA</given-names> </name><name name-style="western"><surname>Abukhalaf</surname><given-names>FA</given-names> </name><name name-style="western"><surname>ElToukhy</surname><given-names>RA</given-names> </name><name name-style="western"><surname>Sattar</surname><given-names>K</given-names> </name></person-group><article-title>Exploring the role of DeepSeek-R1, ChatGPT-4, and Google Gemini in medical education: how valid and reliable are they?</article-title><source>Pak J Med Sci</source><year>2025</year><month>07</month><volume>41</volume><issue>7</issue><fpage>1887</fpage><lpage>1892</lpage><pub-id pub-id-type="doi">10.12669/pjms.41.7.12183</pub-id><pub-id pub-id-type="medline">40735572</pub-id></nlm-citation></ref><ref id="ref47"><label>47</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Shean</surname><given-names>R</given-names> </name><name name-style="western"><surname>Shah</surname><given-names>T</given-names> </name><name name-style="western"><surname>Pandiarajan</surname><given-names>A</given-names> </name><etal/></person-group><article-title>A comparative analysis of DeepSeek R1, DeepSeek-R1-Lite, OpenAi o1 Pro, and Grok 3 performance on ophthalmology board-style questions</article-title><source>Sci Rep</source><year>2025</year><month>07</month><day>2</day><volume>15</volume><issue>1</issue><fpage>23101</fpage><pub-id pub-id-type="doi">10.1038/s41598-025-08601-2</pub-id><pub-id pub-id-type="medline">40595291</pub-id></nlm-citation></ref><ref id="ref48"><label>48</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Tassoker</surname><given-names>M</given-names> </name></person-group><article-title>Who knows anatomy best? A comparative study of ChatGPT-4o, DeepSeek, Gemini, and Claude</article-title><source>Clin Anat</source><year>2026</year><month>01</month><volume>39</volume><issue>1</issue><fpage>25</fpage><lpage>29</lpage><pub-id pub-id-type="doi">10.1002/ca.70012</pub-id><pub-id pub-id-type="medline">40708277</pub-id></nlm-citation></ref><ref id="ref49"><label>49</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ucdal</surname><given-names>M</given-names> </name><name name-style="western"><surname>Yurtsever</surname><given-names>K</given-names> </name><name name-style="western"><surname>Yildiz</surname><given-names>P</given-names> </name><name name-style="western"><surname>Akalin</surname><given-names>A</given-names> </name><name name-style="western"><surname>Mert</surname><given-names>KU</given-names> </name><name name-style="western"><surname>Guven</surname><given-names>GS</given-names> </name></person-group><article-title>Comparison of artificial intelligence models and human experts in managing dyslipidemia: assessment of adherence to clinical guidelines</article-title><source>Cureus</source><year>2025</year><month>08</month><volume>17</volume><issue>8</issue><fpage>e91363</fpage><pub-id pub-id-type="doi">10.7759/cureus.91363</pub-id><pub-id pub-id-type="medline">40904968</pub-id></nlm-citation></ref><ref id="ref50"><label>50</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Lee</surname><given-names>S</given-names> </name><name name-style="western"><surname>Jung</surname><given-names>S</given-names> </name><name name-style="western"><surname>Park</surname><given-names>JH</given-names> </name><name name-style="western"><surname>Cho</surname><given-names>H</given-names> </name><name name-style="western"><surname>Moon</surname><given-names>S</given-names> </name><name name-style="western"><surname>Ahn</surname><given-names>S</given-names> </name></person-group><article-title>Performance of ChatGPT, Gemini and DeepSeek for non-critical triage support using real-world conversations in emergency department</article-title><source>BMC Emerg Med</source><year>2025</year><month>09</month><day>1</day><volume>25</volume><issue>1</issue><fpage>176</fpage><pub-id pub-id-type="doi">10.1186/s12873-025-01337-2</pub-id><pub-id pub-id-type="medline">40890624</pub-id></nlm-citation></ref><ref id="ref51"><label>51</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Tao</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Li</surname><given-names>X</given-names> </name><name name-style="western"><surname>Yisha</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Yang</surname><given-names>S</given-names> </name><name name-style="western"><surname>Zhan</surname><given-names>S</given-names> </name><name name-style="western"><surname>Sun</surname><given-names>F</given-names> </name></person-group><article-title>LitAutoScreener: development and validation of an automated literature screening tool in evidence-based medicine driven by large language models</article-title><source>Health Data Sci</source><year>2025</year><volume>5</volume><fpage>0322</fpage><pub-id pub-id-type="doi">10.34133/hds.0322</pub-id><pub-id pub-id-type="medline">40904687</pub-id></nlm-citation></ref><ref id="ref52"><label>52</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ruan</surname><given-names>M</given-names> </name><name name-style="western"><surname>Fan</surname><given-names>J</given-names> </name><name name-style="western"><surname>Liu</surname><given-names>M</given-names> </name><name name-style="western"><surname>Meng</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Zhang</surname><given-names>X</given-names> </name><name name-style="western"><surname>Zhang</surname><given-names>C</given-names> </name></person-group><article-title>Artificial intelligence for the science of evidence synthesis: how good are AI-powered tools for automatic literature screening?</article-title><source>BMC Med Res Methodol</source><year>2025</year><month>08</month><day>25</day><volume>25</volume><issue>1</issue><fpage>199</fpage><pub-id pub-id-type="doi">10.1186/s12874-025-02644-9</pub-id><pub-id pub-id-type="medline">40855531</pub-id></nlm-citation></ref><ref id="ref53"><label>53</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Cai</surname><given-names>X</given-names> </name><name name-style="western"><surname>Geng</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Du</surname><given-names>Y</given-names> </name><etal/></person-group><article-title>Utilizing large language models to select literature for meta-analysis shows workload reduction while maintaining a similar recall level as manual curation</article-title><source>BMC Med Res Methodol</source><year>2025</year><month>04</month><day>28</day><volume>25</volume><issue>1</issue><fpage>116</fpage><pub-id pub-id-type="doi">10.1186/s12874-025-02569-3</pub-id><pub-id pub-id-type="medline">40295957</pub-id></nlm-citation></ref><ref id="ref54"><label>54</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Young</surname><given-names>RJ</given-names> </name><name name-style="western"><surname>Matthews</surname><given-names>AM</given-names> </name><name name-style="western"><surname>Poston</surname><given-names>B</given-names> </name></person-group><article-title>Benchmarking multiple large language models for automated clinical trial data extraction in aging research</article-title><source>Algorithms</source><year>2025</year><volume>18</volume><issue>5</issue><fpage>296</fpage><pub-id pub-id-type="doi">10.3390/a18050296</pub-id></nlm-citation></ref><ref id="ref55"><label>55</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Grillo</surname><given-names>R</given-names> </name><name name-style="western"><surname>Llanos</surname><given-names>AH</given-names> </name><name name-style="western"><surname>Costa</surname><given-names>C</given-names> </name><name name-style="western"><surname>Melhem-Elias</surname><given-names>F</given-names> </name></person-group><article-title>Comparison of large language models in oral and maxillofacial surgery</article-title><source>Br J Oral Maxillofac Surg</source><year>2026</year><month>01</month><volume>64</volume><issue>1</issue><fpage>43</fpage><lpage>49</lpage><pub-id pub-id-type="doi">10.1016/j.bjoms.2025.08.015</pub-id><pub-id pub-id-type="medline">41076417</pub-id></nlm-citation></ref><ref id="ref56"><label>56</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Scholz</surname><given-names>V</given-names> </name><name name-style="western"><surname>Bichtemann</surname><given-names>S</given-names> </name><name name-style="western"><surname>Bott</surname><given-names>OJ</given-names> </name><name name-style="western"><surname>Illig</surname><given-names>T</given-names> </name><name name-style="western"><surname>Haag</surname><given-names>S</given-names> </name></person-group><article-title>AI for extracting pre-analytical variability data from biomedical literature: feasibility and validation</article-title><source>Stud Health Technol Inform</source><year>2025</year><month>09</month><day>3</day><volume>331</volume><fpage>52</fpage><lpage>62</lpage><pub-id pub-id-type="doi">10.3233/SHTI251379</pub-id><pub-id pub-id-type="medline">40899527</pub-id></nlm-citation></ref><ref id="ref57"><label>57</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wu</surname><given-names>X</given-names> </name><name name-style="western"><surname>Cai</surname><given-names>G</given-names> </name><name name-style="western"><surname>Guo</surname><given-names>B</given-names> </name><etal/></person-group><article-title>A multi-dimensional performance evaluation of large language models in dental implantology: comparison of ChatGPT, DeepSeek, Grok, Gemini and Qwen across diverse clinical scenarios</article-title><source>BMC Oral Health</source><year>2025</year><month>07</month><day>28</day><volume>25</volume><issue>1</issue><fpage>1272</fpage><pub-id pub-id-type="doi">10.1186/s12903-025-06619-6</pub-id><pub-id pub-id-type="medline">40721763</pub-id></nlm-citation></ref><ref id="ref58"><label>58</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Li</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Dong</surname><given-names>J</given-names> </name><name name-style="western"><surname>Liu</surname><given-names>D</given-names> </name><etal/></person-group><article-title>Systematic benchmarking of large language models in programmed cell death-oriented gastric cancer research: a comparative analysis of DeepSeek&#x2011;V3, DeepSeek&#x2011;R1, and Claude 3.5</article-title><source>Discov Onc</source><year>2025</year><month>07</month><day>1</day><volume>16</volume><issue>1</issue><fpage>1227</fpage><pub-id pub-id-type="doi">10.1007/s12672-025-02911-7</pub-id></nlm-citation></ref><ref id="ref59"><label>59</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kayaalp</surname><given-names>ME</given-names> </name><name name-style="western"><surname>G&#x00FC;ltekin</surname><given-names>O</given-names> </name><name name-style="western"><surname>Ak&#x00E7;aalan</surname><given-names>S</given-names> </name><name name-style="western"><surname>Kahraman</surname><given-names>H&#x00C7;</given-names> </name><name name-style="western"><surname>Top&#x00E7;u</surname><given-names>HN</given-names> </name><name name-style="western"><surname>Kavrul Kayaalp</surname><given-names>G</given-names> </name></person-group><article-title>Artificial intelligence in medical and biological research: promise and perils of ChatGPT and DeepSeek in advancing healthcare</article-title><source>Turk J Biol</source><year>2025</year><volume>49</volume><issue>5</issue><fpage>585</fpage><lpage>599</lpage><pub-id pub-id-type="doi">10.55730/1300-0152.2765</pub-id><pub-id pub-id-type="medline">41246235</pub-id></nlm-citation></ref><ref id="ref60"><label>60</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Abuabara</surname><given-names>A</given-names> </name><name name-style="western"><surname>do Nascimento</surname><given-names>T</given-names> </name><name name-style="western"><surname>Trentini</surname><given-names>SM</given-names> </name><etal/></person-group><article-title>Evaluating the accuracy of generative artificial intelligence models in dental age estimation based on the Demirjian&#x2019;s method</article-title><source>Front Dent Med</source><year>2025</year><volume>6</volume><fpage>1634006</fpage><pub-id pub-id-type="doi">10.3389/fdmed.2025.1634006</pub-id><pub-id pub-id-type="medline">40800006</pub-id></nlm-citation></ref><ref id="ref61"><label>61</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>AlShahwan</surname><given-names>N</given-names> </name><name name-style="western"><surname>Fetyani</surname><given-names>IM</given-names> </name><name name-style="western"><surname>Beyari</surname><given-names>MB</given-names> </name><etal/></person-group><article-title>Comparative performance analysis of AI engines in answering American Board of Surgery in-training examination questions: a multi-subspecialty evaluation</article-title><source>Surg Innov</source><year>2025</year><month>12</month><volume>32</volume><issue>6</issue><fpage>502</fpage><lpage>506</lpage><pub-id pub-id-type="doi">10.1177/15533506251361664</pub-id><pub-id pub-id-type="medline">40664612</pub-id></nlm-citation></ref><ref id="ref62"><label>62</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Yang</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Yang</surname><given-names>F</given-names> </name><name name-style="western"><surname>Xiao</surname><given-names>S</given-names> </name><etal/></person-group><article-title>Application of large language models in TN staging and treatment response evaluation for patients with nasopharyngeal carcinoma: a comparative performance analysis of ChatGPT-4o-Latest and DeepSeek-V3-0324</article-title><source>J Magn Reson Imaging</source><year>2025</year><month>12</month><volume>62</volume><issue>6</issue><fpage>1793</fpage><lpage>1801</lpage><pub-id pub-id-type="doi">10.1002/jmri.70140</pub-id><pub-id pub-id-type="medline">41045017</pub-id></nlm-citation></ref><ref id="ref63"><label>63</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Yan</surname><given-names>MY</given-names> </name><name name-style="western"><surname>Qin</surname><given-names>JA</given-names> </name><name name-style="western"><surname>Yan</surname><given-names>D</given-names> </name></person-group><article-title>Performance evaluation and application value of large language models in the prediction of drug-drug interactions</article-title><source>Yaoxue Xuebao</source><year>2025</year><volume>60</volume><issue>7</issue><fpage>2122</fpage><lpage>2131</lpage><pub-id pub-id-type="doi">10.16438/j.0513-4870.2025-0590</pub-id></nlm-citation></ref><ref id="ref64"><label>64</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Xie</surname><given-names>L</given-names> </name><name name-style="western"><surname>Jin</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Xu</surname><given-names>L</given-names> </name><name name-style="western"><surname>Chang</surname><given-names>S</given-names> </name><name name-style="western"><surname>Xu</surname><given-names>X</given-names> </name></person-group><article-title>Fusing domain knowledge with a fine-tuned large language model for enhanced molecular property prediction</article-title><source>J Chem Theory Comput</source><year>2025</year><month>07</month><day>22</day><volume>21</volume><issue>14</issue><fpage>6743</fpage><lpage>6758</lpage><pub-id pub-id-type="doi">10.1021/acs.jctc.5c00605</pub-id><pub-id pub-id-type="medline">40631446</pub-id></nlm-citation></ref><ref id="ref65"><label>65</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>McCoy</surname><given-names>TH</given-names> </name><name name-style="western"><surname>Perlis</surname><given-names>RH</given-names> </name></person-group><article-title>Reasoning language models for more transparent prediction of suicide risk</article-title><source>BMJ Ment Health</source><year>2025</year><month>05</month><day>11</day><volume>28</volume><issue>1</issue><fpage>e301654</fpage><pub-id pub-id-type="doi">10.1136/bmjment-2025-301654</pub-id><pub-id pub-id-type="medline">40350181</pub-id></nlm-citation></ref><ref id="ref66"><label>66</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Alessandri-Bonetti</surname><given-names>M</given-names> </name><name name-style="western"><surname>Liu</surname><given-names>HY</given-names> </name><name name-style="western"><surname>Giorgino</surname><given-names>R</given-names> </name><name name-style="western"><surname>Nguyen</surname><given-names>VT</given-names> </name><name name-style="western"><surname>Egro</surname><given-names>FM</given-names> </name></person-group><article-title>The first months of life of ChatGPT and its impact in healthcare: a bibliometric analysis of the current literature</article-title><source>Ann Biomed Eng</source><year>2024</year><month>05</month><volume>52</volume><issue>5</issue><fpage>1107</fpage><lpage>1110</lpage><pub-id pub-id-type="doi">10.1007/s10439-023-03325-8</pub-id><pub-id pub-id-type="medline">37482572</pub-id></nlm-citation></ref><ref id="ref67"><label>67</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Chen</surname><given-names>SF</given-names> </name><name name-style="western"><surname>Alyakin</surname><given-names>A</given-names> </name><name name-style="western"><surname>Seas</surname><given-names>A</given-names> </name><etal/></person-group><article-title>LLM-assisted systematic review of large language models in clinical medicine</article-title><source>Nat Med</source><year>2026</year><month>03</month><volume>32</volume><issue>3</issue><fpage>1152</fpage><lpage>1159</lpage><pub-id pub-id-type="doi">10.1038/s41591-026-04229-5</pub-id><pub-id pub-id-type="medline">41776077</pub-id></nlm-citation></ref><ref id="ref68"><label>68</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Anusitviwat</surname><given-names>C</given-names> </name><name name-style="western"><surname>Suwannaphisit</surname><given-names>S</given-names> </name><name name-style="western"><surname>Bvonpanttarananon</surname><given-names>J</given-names> </name><name name-style="western"><surname>Tangtrakulwanich</surname><given-names>B</given-names> </name></person-group><article-title>Comparing ChatGPT and DeepSeek for assessment of multiple-choice questions in orthopedic medical education: cross-sectional study</article-title><source>JMIR Form Res</source><year>2025</year><month>12</month><day>19</day><volume>9</volume><fpage>e75607</fpage><pub-id pub-id-type="doi">10.2196/75607</pub-id><pub-id pub-id-type="medline">41418321</pub-id></nlm-citation></ref><ref id="ref69"><label>69</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Cascella</surname><given-names>M</given-names> </name><name name-style="western"><surname>Montomoli</surname><given-names>J</given-names> </name><name name-style="western"><surname>Bellini</surname><given-names>V</given-names> </name><name name-style="western"><surname>Bignami</surname><given-names>E</given-names> </name></person-group><article-title>Evaluating the feasibility of ChatGPT in healthcare: an analysis of multiple clinical and research scenarios</article-title><source>J Med Syst</source><year>2023</year><month>03</month><day>4</day><volume>47</volume><issue>1</issue><fpage>33</fpage><pub-id pub-id-type="doi">10.1007/s10916-023-01925-4</pub-id><pub-id pub-id-type="medline">36869927</pub-id></nlm-citation></ref><ref id="ref70"><label>70</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Thirunavukarasu</surname><given-names>AJ</given-names> </name><name name-style="western"><surname>Ting</surname><given-names>DSJ</given-names> </name><name name-style="western"><surname>Elangovan</surname><given-names>K</given-names> </name><name name-style="western"><surname>Gutierrez</surname><given-names>L</given-names> </name><name name-style="western"><surname>Tan</surname><given-names>TF</given-names> </name><name name-style="western"><surname>Ting</surname><given-names>DSW</given-names> </name></person-group><article-title>Large language models in medicine</article-title><source>Nat Med</source><year>2023</year><month>08</month><volume>29</volume><issue>8</issue><fpage>1930</fpage><lpage>1940</lpage><pub-id pub-id-type="doi">10.1038/s41591-023-02448-8</pub-id><pub-id pub-id-type="medline">37460753</pub-id></nlm-citation></ref><ref id="ref71"><label>71</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>He</surname><given-names>K</given-names> </name><name name-style="western"><surname>Mao</surname><given-names>R</given-names> </name><name name-style="western"><surname>Lin</surname><given-names>Q</given-names> </name><etal/></person-group><article-title>A survey of large language models for healthcare: from data, technology, and applications to accountability and ethics</article-title><source>Information Fusion</source><year>2025</year><month>06</month><volume>118</volume><fpage>102963</fpage><pub-id pub-id-type="doi">10.1016/j.inffus.2025.102963</pub-id></nlm-citation></ref><ref id="ref72"><label>72</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Liu</surname><given-names>F</given-names> </name><name name-style="western"><surname>Zhou</surname><given-names>H</given-names> </name><name name-style="western"><surname>Gu</surname><given-names>B</given-names> </name><etal/></person-group><article-title>Application of large language models in medicine</article-title><source>Nat Rev Bioeng</source><year>2025</year><month>07</month><volume>3</volume><issue>6</issue><fpage>445</fpage><lpage>464</lpage><pub-id pub-id-type="doi">10.1038/s44222-025-00279-5</pub-id></nlm-citation></ref><ref id="ref73"><label>73</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Unger</surname><given-names>JP</given-names> </name><name name-style="western"><surname>Morales</surname><given-names>I</given-names> </name><name name-style="western"><surname>De Paepe</surname><given-names>P</given-names> </name><name name-style="western"><surname>Roland</surname><given-names>M</given-names> </name></person-group><article-title>Integrating clinical and public health knowledge in support of joint medical practice</article-title><source>BMC Health Serv Res</source><year>2020</year><month>12</month><day>9</day><volume>20</volume><issue>Suppl 2</issue><fpage>1073</fpage><pub-id pub-id-type="doi">10.1186/s12913-020-05886-z</pub-id><pub-id pub-id-type="medline">33292211</pub-id></nlm-citation></ref><ref id="ref74"><label>74</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Hassanein</surname><given-names>FEA</given-names> </name><name name-style="western"><surname>El Barbary</surname><given-names>A</given-names> </name><name name-style="western"><surname>Hussein</surname><given-names>RR</given-names> </name><etal/></person-group><article-title>Diagnostic performance of ChatGPT-4o and DeepSeek-3 differential diagnosis of complex oral lesions: a multimodal imaging and case difficulty analysis</article-title><source>Oral Dis</source><year>2025</year><month>12</month><volume>31</volume><issue>12</issue><fpage>3361</fpage><lpage>3371</lpage><pub-id pub-id-type="doi">10.1111/odi.70007</pub-id><pub-id pub-id-type="medline">40589366</pub-id></nlm-citation></ref><ref id="ref75"><label>75</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Goyal</surname><given-names>A</given-names> </name><name name-style="western"><surname>Sulaiman</surname><given-names>SA</given-names> </name><name name-style="western"><surname>Alaarag</surname><given-names>A</given-names> </name><etal/></person-group><article-title>Comparison of ChatGPT and DeepSeek large language models in the diagnosis of pericarditis</article-title><source>World J Cardiol</source><year>2025</year><month>08</month><day>26</day><volume>17</volume><issue>8</issue><fpage>110489</fpage><pub-id pub-id-type="doi">10.4330/wjc.v17.i8.110489</pub-id><pub-id pub-id-type="medline">40949931</pub-id></nlm-citation></ref><ref id="ref76"><label>76</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>He</surname><given-names>Q</given-names> </name><name name-style="western"><surname>Tan</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Niu</surname><given-names>W</given-names> </name><etal/></person-group><article-title>From algorithms to operating room: can large language models master China&#x2019;s attending anesthesiology exam? A cross-sectional evaluation</article-title><source>Int J Surg</source><year>2026</year><month>01</month><day>1</day><volume>112</volume><issue>1</issue><fpage>190</fpage><lpage>201</lpage><pub-id pub-id-type="doi">10.1097/JS9.0000000000003406</pub-id><pub-id pub-id-type="medline">40905848</pub-id></nlm-citation></ref><ref id="ref77"><label>77</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Karata&#x015F;</surname><given-names>G</given-names> </name><name name-style="western"><surname>Karata&#x015F;</surname><given-names>ME</given-names> </name></person-group><article-title>Artificial intelligence in pediatric ophthalmology: a comparative study of ChatGPT-4.0 and DeepSeek-R1 performance</article-title><source>Strabismus</source><year>2026</year><month>03</month><volume>34</volume><issue>1</issue><fpage>61</fpage><lpage>67</lpage><pub-id pub-id-type="doi">10.1080/09273972.2025.2536782</pub-id><pub-id pub-id-type="medline">40726359</pub-id></nlm-citation></ref><ref id="ref78"><label>78</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Smith</surname><given-names>A</given-names> </name><name name-style="western"><surname>Liebrenz</surname><given-names>M</given-names> </name><name name-style="western"><surname>Bhugra</surname><given-names>D</given-names> </name><name name-style="western"><surname>Grana</surname><given-names>J</given-names> </name><name name-style="western"><surname>Schleifer</surname><given-names>R</given-names> </name><name name-style="western"><surname>Buadze</surname><given-names>A</given-names> </name></person-group><article-title>Are clinical improvements in large language models a reality? Longitudinal comparisons of ChatGPT models and DeepSeek-R1 for psychiatric assessments and interventions</article-title><source>Int J Soc Psychiatry</source><year>2026</year><month>02</month><volume>72</volume><issue>1</issue><fpage>91</fpage><lpage>102</lpage><pub-id pub-id-type="doi">10.1177/00207640251358071</pub-id><pub-id pub-id-type="medline">40741928</pub-id></nlm-citation></ref><ref id="ref79"><label>79</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Harada</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Kawamura</surname><given-names>R</given-names> </name><name name-style="western"><surname>Yokose</surname><given-names>M</given-names> </name><name name-style="western"><surname>Singh</surname><given-names>H</given-names> </name><name name-style="western"><surname>Shimizu</surname><given-names>T</given-names> </name></person-group><article-title>Atypical presentations at risk for diagnostic errors in internal medicine: a scoping review</article-title><source>J Gen Intern Med</source><year>2026</year><month>05</month><volume>41</volume><issue>7</issue><fpage>1937</fpage><lpage>1956</lpage><pub-id pub-id-type="doi">10.1007/s11606-025-09901-z</pub-id><pub-id pub-id-type="medline">41085962</pub-id></nlm-citation></ref><ref id="ref80"><label>80</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Brohi</surname><given-names>S</given-names> </name><name name-style="western"><surname>Mastoi</surname><given-names>Q ul ain</given-names> </name><name name-style="western"><surname>Jhanjhi</surname><given-names>NZ</given-names> </name><name name-style="western"><surname>Pillai</surname><given-names>TR</given-names> </name></person-group><article-title>A research landscape of agentic AI and large language models: applications, challenges and future directions</article-title><source>Algorithms</source><year>2025</year><volume>18</volume><issue>8</issue><fpage>499</fpage><pub-id pub-id-type="doi">10.3390/a18080499</pub-id></nlm-citation></ref><ref id="ref81"><label>81</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Temsah</surname><given-names>A</given-names> </name><name name-style="western"><surname>Alhasan</surname><given-names>K</given-names> </name><name name-style="western"><surname>Altamimi</surname><given-names>I</given-names> </name><etal/></person-group><article-title>DeepSeek in healthcare: revealing opportunities and steering challenges of a new open-source artificial intelligence frontier</article-title><source>Cureus</source><year>2025</year><month>02</month><volume>17</volume><issue>2</issue><fpage>e79221</fpage><pub-id pub-id-type="doi">10.7759/cureus.79221</pub-id><pub-id pub-id-type="medline">39974299</pub-id></nlm-citation></ref><ref id="ref82"><label>82</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Liu</surname><given-names>Q</given-names> </name><name name-style="western"><surname>Xin</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Wu</surname><given-names>C</given-names> </name><etal/></person-group><article-title>Diagnostic value of combining ultrafast cine MRI and morphological measurements on gastroesophageal reflux disease</article-title><source>Abdom Radiol</source><year>2025</year><volume>50</volume><issue>10</issue><fpage>4495</fpage><lpage>4506</lpage><pub-id pub-id-type="doi">10.1007/s00261-025-04890-3</pub-id></nlm-citation></ref><ref id="ref83"><label>83</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Diniz-Freitas</surname><given-names>M</given-names> </name><name name-style="western"><surname>Diz-Dios</surname><given-names>P</given-names> </name></person-group><article-title>DeepSeek: another step forward in the diagnosis of oral lesions</article-title><source>J Dent Sci</source><year>2025</year><month>07</month><volume>20</volume><issue>3</issue><fpage>1904</fpage><lpage>1907</lpage><pub-id pub-id-type="doi">10.1016/j.jds.2025.02.023</pub-id><pub-id pub-id-type="medline">40654453</pub-id></nlm-citation></ref><ref id="ref84"><label>84</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>ElSayed</surname><given-names>A</given-names> </name><name name-style="western"><surname>Updegrove</surname><given-names>GF</given-names> </name></person-group><article-title>Limitations of broadly trained LLMs in interpreting orthopedic Walch glenoid classifications</article-title><source>Front Artif Intell</source><year>2025</year><volume>8</volume><fpage>1644093</fpage><pub-id pub-id-type="doi">10.3389/frai.2025.1644093</pub-id><pub-id pub-id-type="medline">40951327</pub-id></nlm-citation></ref><ref id="ref85"><label>85</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Beauchamp</surname><given-names>T</given-names> </name><name name-style="western"><surname>Childress</surname><given-names>J</given-names> </name></person-group><article-title>Principles of Biomedical Ethics: marking its fortieth anniversary</article-title><source>Am J Bioeth</source><year>2019</year><month>11</month><volume>19</volume><issue>11</issue><fpage>9</fpage><lpage>12</lpage><pub-id pub-id-type="doi">10.1080/15265161.2019.1665402</pub-id><pub-id pub-id-type="medline">31647760</pub-id></nlm-citation></ref><ref id="ref86"><label>86</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wang</surname><given-names>M</given-names> </name><name name-style="western"><surname>Shen</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Zhao</surname><given-names>B</given-names> </name><name name-style="western"><surname>Zhou</surname><given-names>X</given-names> </name><name name-style="western"><surname>Sun</surname><given-names>L</given-names> </name><name name-style="western"><surname>Liu</surname><given-names>X</given-names> </name></person-group><article-title>Enhancing LLM-based clinical reasoning in anesthesiology via graph-augmented retrieval and explainable generation</article-title><source>Health Inf Sci Syst</source><year>2025</year><month>12</month><volume>13</volume><issue>1</issue><fpage>62</fpage><pub-id pub-id-type="doi">10.1007/s13755-025-00379-x</pub-id><pub-id pub-id-type="medline">41041605</pub-id></nlm-citation></ref><ref id="ref87"><label>87</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Choudhury</surname><given-names>A</given-names> </name><name name-style="western"><surname>Shahsavar</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Shamszare</surname><given-names>H</given-names> </name></person-group><article-title>User intent to use DeepSeek for health care purposes and their trust in the large language model: multinational survey study</article-title><source>JMIR Hum Factors</source><year>2025</year><month>05</month><day>26</day><volume>12</volume><fpage>e72867</fpage><pub-id pub-id-type="doi">10.2196/72867</pub-id><pub-id pub-id-type="medline">40418796</pub-id></nlm-citation></ref><ref id="ref88"><label>88</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wang</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Zhou</surname><given-names>H</given-names> </name><name name-style="western"><surname>Song</surname><given-names>T</given-names> </name></person-group><article-title>A bibliometric analysis of large language model-based AI chatbots in surgery</article-title><source>Annals of Medicine &#x0026; Surgery</source><year>2025</year><volume>87</volume><issue>7</issue><fpage>4127</fpage><lpage>4138</lpage><pub-id pub-id-type="doi">10.1097/MS9.0000000000003234</pub-id></nlm-citation></ref><ref id="ref89"><label>89</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Mo&#x00EB;ll</surname><given-names>B</given-names> </name><name name-style="western"><surname>Sand Aronsson</surname><given-names>F</given-names> </name><name name-style="western"><surname>Akbar</surname><given-names>S</given-names> </name></person-group><article-title>Medical reasoning in LLMs: an in-depth analysis of DeepSeek R1</article-title><source>Front Artif Intell</source><year>2025</year><volume>8</volume><fpage>1616145</fpage><pub-id pub-id-type="doi">10.3389/frai.2025.1616145</pub-id><pub-id pub-id-type="medline">40607450</pub-id></nlm-citation></ref><ref id="ref90"><label>90</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Cao</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>J</given-names></name><name name-style="western"><surname>Li</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Zhang</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Zhong</surname><given-names>G</given-names> </name><name name-style="western"><surname>Song</surname><given-names>P</given-names></name></person-group><article-title>Expert consensus on the deployment of DeepSeek in medical institutions</article-title><source>Chinese Medical Ethics</source><year>2025</year><volume>38</volume><issue>5</issue><fpage>674</fpage><lpage>678</lpage><pub-id pub-id-type="doi">10.12026/j.issn.1001-8565.2025.05.19</pub-id></nlm-citation></ref><ref id="ref91"><label>91</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Si</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Meng</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Chen</surname><given-names>X</given-names> </name><etal/></person-group><article-title>Quality safety and disparity of an AI chatbot in managing chronic diseases: simulated patient experiments</article-title><source>NPJ Digit Med</source><year>2025</year><month>09</month><day>25</day><volume>8</volume><issue>1</issue><fpage>574</fpage><pub-id pub-id-type="doi">10.1038/s41746-025-01956-w</pub-id><pub-id pub-id-type="medline">40999038</pub-id></nlm-citation></ref><ref id="ref92"><label>92</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Dong</surname><given-names>C</given-names> </name><name name-style="western"><surname>Qiu</surname><given-names>X</given-names> </name><name name-style="western"><surname>Deng</surname><given-names>J</given-names> </name><etal/></person-group><article-title>Comparative evaluation of large language models in delivering guideline-compliant recommendations for topical NSAID use in musculoskeletal pain: a multidimensional analysis</article-title><source>Clin Rheumatol</source><year>2025</year><month>11</month><volume>44</volume><issue>11</issue><fpage>4703</fpage><lpage>4710</lpage><pub-id pub-id-type="doi">10.1007/s10067-025-07640-4</pub-id><pub-id pub-id-type="medline">40952435</pub-id></nlm-citation></ref><ref id="ref93"><label>93</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Rowland</surname><given-names>SP</given-names> </name><name name-style="western"><surname>Fitzgerald</surname><given-names>JE</given-names> </name><name name-style="western"><surname>Holme</surname><given-names>T</given-names> </name><name name-style="western"><surname>Powell</surname><given-names>J</given-names> </name><name name-style="western"><surname>McGregor</surname><given-names>A</given-names> </name></person-group><article-title>What is the clinical value of mHealth for patients?</article-title><source>NPJ Digit Med</source><year>2020</year><volume>3</volume><fpage>4</fpage><pub-id pub-id-type="doi">10.1038/s41746-019-0206-x</pub-id><pub-id pub-id-type="medline">31970289</pub-id></nlm-citation></ref><ref id="ref94"><label>94</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Watts</surname><given-names>E</given-names> </name><name name-style="western"><surname>Patel</surname><given-names>H</given-names> </name><name name-style="western"><surname>Kostov</surname><given-names>A</given-names> </name><name name-style="western"><surname>Kim</surname><given-names>J</given-names> </name><name name-style="western"><surname>Elkbuli</surname><given-names>A</given-names> </name></person-group><article-title>The role of compassionate care in medicine: toward improving patients&#x2019; quality of care and satisfaction</article-title><source>J Surg Res</source><year>2023</year><month>09</month><volume>289</volume><fpage>1</fpage><lpage>7</lpage><pub-id pub-id-type="doi">10.1016/j.jss.2023.03.024</pub-id><pub-id pub-id-type="medline">37068438</pub-id></nlm-citation></ref><ref id="ref95"><label>95</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Thomas</surname><given-names>RL</given-names> </name><name name-style="western"><surname>Uminsky</surname><given-names>D</given-names> </name></person-group><article-title>Reliance on metrics is a fundamental challenge for AI</article-title><source>Patterns (N Y)</source><year>2022</year><month>05</month><day>13</day><volume>3</volume><issue>5</issue><fpage>100476</fpage><pub-id pub-id-type="doi">10.1016/j.patter.2022.100476</pub-id><pub-id pub-id-type="medline">35607624</pub-id></nlm-citation></ref><ref id="ref96"><label>96</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Su</surname><given-names>H</given-names> </name><name name-style="western"><surname>Sun</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Li</surname><given-names>R</given-names> </name><etal/></person-group><article-title>Large language models in medical diagnostics: scoping review with bibliometric analysis</article-title><source>J Med Internet Res</source><year>2025</year><month>06</month><day>9</day><volume>27</volume><fpage>e72062</fpage><pub-id pub-id-type="doi">10.2196/72062</pub-id><pub-id pub-id-type="medline">40489764</pub-id></nlm-citation></ref><ref id="ref97"><label>97</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Zhou</surname><given-names>H</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>R</given-names> </name><etal/></person-group><article-title>DeepSeek versus GPT: evaluation of large language model chatbots&#x2019; responses on orofacial clefts</article-title><source>J Craniofac Surg</source><year>2025</year><month>09</month><day>1</day><volume>36</volume><issue>6</issue><fpage>2197</fpage><lpage>2201</lpage><pub-id pub-id-type="doi">10.1097/SCS.0000000000011399</pub-id><pub-id pub-id-type="medline">40245329</pub-id></nlm-citation></ref><ref id="ref98"><label>98</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Zhou</surname><given-names>J</given-names> </name><name name-style="western"><surname>Cheng</surname><given-names>Y</given-names> </name><name name-style="western"><surname>He</surname><given-names>S</given-names> </name><name name-style="western"><surname>Chen</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Chen</surname><given-names>H</given-names> </name></person-group><article-title>Large language models for transforming healthcare: a perspective on DeepSeek&#x2010;R1</article-title><source>MedComm &#x2013; Future Medicine</source><year>2025</year><month>06</month><volume>4</volume><issue>2</issue><fpage>e70021</fpage><comment><ext-link ext-link-type="uri" xlink:href="https://onlinelibrary.wiley.com/toc/27696456/4/2">https://onlinelibrary.wiley.com/toc/27696456/4/2</ext-link></comment><pub-id pub-id-type="doi">10.1002/mef2.70021</pub-id></nlm-citation></ref><ref id="ref99"><label>99</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wang</surname><given-names>Q</given-names> </name><name name-style="western"><surname>Chen</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Zhang</surname><given-names>H</given-names> </name><etal/></person-group><article-title>Large language models could be applied in personalized out-of-hospital management for breast cancer: a prospective randomized single blind study</article-title><source>Sci Rep</source><year>2025</year><month>09</month><day>29</day><volume>15</volume><issue>1</issue><fpage>33589</fpage><pub-id pub-id-type="doi">10.1038/s41598-025-18759-4</pub-id></nlm-citation></ref><ref id="ref100"><label>100</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sahni</surname><given-names>NR</given-names> </name><name name-style="western"><surname>Carrus</surname><given-names>B</given-names> </name></person-group><article-title>Artificial intelligence in U.S. health care delivery</article-title><source>N Engl J Med</source><year>2023</year><month>10</month><day>12</day><volume>389</volume><issue>15</issue><fpage>1442</fpage><lpage>1443</lpage><pub-id pub-id-type="doi">10.1056/NEJMc2310288</pub-id></nlm-citation></ref><ref id="ref101"><label>101</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Finkenberg</surname><given-names>J</given-names> </name></person-group><article-title>NASS 2023 presidential address: artificial intelligence and its effect on the art of medicine and the physician- patient relationship</article-title><source>Spine J</source><year>2024</year><month>02</month><volume>24</volume><issue>2</issue><fpage>191</fpage><lpage>194</lpage><pub-id pub-id-type="doi">10.1016/j.spinee.2023.11.001</pub-id><pub-id pub-id-type="medline">37944759</pub-id></nlm-citation></ref><ref id="ref102"><label>102</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Hui</surname><given-names>L</given-names> </name><name name-style="western"><surname>Khosa</surname><given-names>F</given-names> </name></person-group><article-title>Artificial intelligence in action: racial and gender disparities in academic radiology</article-title><source>Cureus</source><year>2025</year><month>09</month><volume>17</volume><issue>9</issue><fpage>e92382</fpage><pub-id pub-id-type="doi">10.7759/cureus.92382</pub-id><pub-id pub-id-type="medline">41103889</pub-id></nlm-citation></ref><ref id="ref103"><label>103</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Dai</surname><given-names>X</given-names> </name><name name-style="western"><surname>Xi</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Hu</surname><given-names>Y</given-names> </name><etal/></person-group><article-title>LLM evaluation for thyroid nodule assessment: comparing ACR-TIRADS, C-TIRADS, and clinician-AI trust gap</article-title><source>Front Endocrinol</source><year>2025</year><volume>16</volume><fpage>1667809</fpage><pub-id pub-id-type="doi">10.3389/fendo.2025.1667809</pub-id></nlm-citation></ref><ref id="ref104"><label>104</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wang</surname><given-names>G</given-names> </name><name name-style="western"><surname>Meng</surname><given-names>X</given-names> </name><name name-style="western"><surname>Zhang</surname><given-names>F</given-names> </name></person-group><article-title>Past, present, and future of global research on artificial intelligence applications in dermatology: a bibliometric analysis</article-title><source>Medicine (Baltimore)</source><year>2023</year><volume>102</volume><issue>45</issue><fpage>e35993</fpage><pub-id pub-id-type="doi">10.1097/MD.0000000000035993</pub-id></nlm-citation></ref><ref id="ref105"><label>105</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Huang</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Yang</surname><given-names>G</given-names> </name><name name-style="western"><surname>Shen</surname><given-names>Y</given-names> </name><etal/></person-group><article-title>Application of large language models in complex clinical cases: cross-sectional evaluation study</article-title><source>JMIR Med Inform</source><year>2025</year><month>08</month><day>14</day><volume>13</volume><fpage>e73941</fpage><pub-id pub-id-type="doi">10.2196/73941</pub-id><pub-id pub-id-type="medline">41055081</pub-id></nlm-citation></ref><ref id="ref106"><label>106</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sallam</surname><given-names>M</given-names> </name><name name-style="western"><surname>Alasfoor</surname><given-names>IM</given-names> </name><name name-style="western"><surname>Khalid</surname><given-names>SW</given-names> </name><etal/></person-group><article-title>Chinese generative AI models (DeepSeek and Qwen) rival ChatGPT-4 in ophthalmology queries with excellent performance in Arabic and English</article-title><source>Narra J</source><year>2025</year><month>04</month><volume>5</volume><issue>1</issue><fpage>e2371</fpage><pub-id pub-id-type="doi">10.52225/narra.v5i1.2371</pub-id><pub-id pub-id-type="medline">40352182</pub-id></nlm-citation></ref><ref id="ref107"><label>107</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Lim</surname><given-names>ECN</given-names> </name><name name-style="western"><surname>Cheng</surname><given-names>NCL</given-names> </name><name name-style="western"><surname>Lim</surname><given-names>CED</given-names> </name></person-group><article-title>The art of medical synthesis: where Chinese medical wisdom intersects with artificial intelligence</article-title><source>Journal of Traditional Chinese Medical Sciences</source><year>2026</year><month>01</month><volume>13</volume><issue>1</issue><fpage>51</fpage><lpage>59</lpage><pub-id pub-id-type="doi">10.1016/j.jtcms.2025.08.001</pub-id></nlm-citation></ref><ref id="ref108"><label>108</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Patil</surname><given-names>NG</given-names> </name><name name-style="western"><surname>Kou</surname><given-names>NL</given-names> </name><name name-style="western"><surname>Baptista&#x2010;Hon</surname><given-names>DT</given-names> </name><name name-style="western"><surname>Monteiro</surname><given-names>O</given-names> </name></person-group><article-title>Artificial intelligence in medical education: a practical guide for educators</article-title><source>MedComm &#x2013; Future Medicine</source><year>2025</year><month>06</month><volume>4</volume><issue>2</issue><fpage>e70018</fpage><comment><ext-link ext-link-type="uri" xlink:href="https://onlinelibrary.wiley.com/toc/27696456/4/2">https://onlinelibrary.wiley.com/toc/27696456/4/2</ext-link></comment><pub-id pub-id-type="doi">10.1002/mef2.70018</pub-id></nlm-citation></ref><ref id="ref109"><label>109</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Yan</surname><given-names>W</given-names> </name><name name-style="western"><surname>Liu</surname><given-names>J</given-names> </name><name name-style="western"><surname>Liang</surname><given-names>W</given-names> </name></person-group><article-title>DeepSeek empowers general medicine: potential application and prospect</article-title><source>Chinese General Practice</source><year>2025</year><month>06</month><volume>28</volume><issue>17</issue><fpage>2065</fpage><lpage>2069</lpage><pub-id pub-id-type="doi">10.12114/j.issn.1007-9572.2025.0023</pub-id></nlm-citation></ref><ref id="ref110"><label>110</label><nlm-citation citation-type="journal"><source>Editage</source><access-date>2026-06-07</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.editage.com/">https://www.editage.com/</ext-link></comment></nlm-citation></ref></ref-list><app-group><supplementary-material id="app1"><label>Multimedia Appendix 1</label><p>Search strategy.</p><media xlink:href="jmir_v28i1e93354_app1.doc" xlink:title="DOC File, 48 KB"/></supplementary-material><supplementary-material id="app2"><label>Multimedia Appendix 2</label><p>Quality assessment criteria for studies included in the scoping review.</p><media xlink:href="jmir_v28i1e93354_app2.doc" xlink:title="DOC File, 33 KB"/></supplementary-material><supplementary-material id="app3"><label>Multimedia Appendix 3</label><p>The extracted data for scoping review.</p><media xlink:href="jmir_v28i1e93354_app3.xlsx" xlink:title="XLSX File, 246 KB"/></supplementary-material><supplementary-material id="app4"><label>Checklist 1</label><p>PRISMA-ScR (Preferred Reporting Items for Systematic Reviews and Meta-Analyses Extension for Scoping Reviews) checklist.</p><media xlink:href="jmir_v28i1e93354_app4.docx" xlink:title="DOCX File, 73 KB"/></supplementary-material></app-group></back></article>