<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="review-article"><front><journal-meta><journal-id journal-id-type="nlm-ta">J Med Internet Res</journal-id><journal-id journal-id-type="publisher-id">jmir</journal-id><journal-id journal-id-type="index">1</journal-id><journal-title>Journal of Medical Internet Research</journal-title><abbrev-journal-title>J Med Internet Res</abbrev-journal-title><issn pub-type="epub">1438-8871</issn><publisher><publisher-name>JMIR Publications</publisher-name><publisher-loc>Toronto, Canada</publisher-loc></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">v28i1e91659</article-id><article-id pub-id-type="doi">10.2196/91659</article-id><article-categories><subj-group subj-group-type="heading"><subject>Review</subject></subj-group></article-categories><title-group><article-title>Deep Learning Algorithms Versus Radiologists in Digital Breast Tomosynthesis for Breast Cancer Detection: Systematic Review and Meta-Analysis</article-title></title-group><contrib-group><contrib contrib-type="author"><name name-style="western"><surname>Lyu</surname><given-names>Shewen</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff1"/></contrib><contrib contrib-type="author"><name name-style="western"><surname>Wang</surname><given-names>Zepeng</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff1"/></contrib><contrib contrib-type="author"><name name-style="western"><surname>Mu</surname><given-names>Yujing</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff1"/></contrib><contrib contrib-type="author"><name name-style="western"><surname>Wang</surname><given-names>Luyao</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff1"/></contrib><contrib contrib-type="author" corresp="yes"><name name-style="western"><surname>Pei</surname><given-names>Xiaohua</given-names></name><degrees>MD</degrees><xref ref-type="aff" rid="aff1"/></contrib></contrib-group><aff id="aff1"><institution>Beijing University of Chinese Medicine Third Affiliated Hospital</institution><addr-line>51 Xiaoguan Street, Andingmenwai, Chaoyang District</addr-line><addr-line>Beijing</addr-line><country>China</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Balcarras</surname><given-names>Matthew</given-names></name></contrib></contrib-group><contrib-group><contrib contrib-type="reviewer"><name name-style="western"><surname>Sheida</surname><given-names>Fateme</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Yu</surname><given-names>Zekai</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to Xiaohua Pei, MD, Beijing University of Chinese Medicine Third Affiliated Hospital, 51 Xiaoguan Street, Andingmenwai, Chaoyang District, Beijing, 100029, China, 86 13911683278; <email>hl0002@bucm.edu.cn</email></corresp></author-notes><pub-date pub-type="collection"><year>2026</year></pub-date><pub-date pub-type="epub"><day>6</day><month>5</month><year>2026</year></pub-date><volume>28</volume><elocation-id>e91659</elocation-id><history><date date-type="received"><day>18</day><month>01</month><year>2026</year></date><date date-type="rev-recd"><day>26</day><month>03</month><year>2026</year></date><date date-type="accepted"><day>26</day><month>03</month><year>2026</year></date></history><copyright-statement>&#x00A9; Shewen Lyu, Zepeng Wang, Yujing Mu, Luyao Wang, Xiaohua Pei. Originally published in the Journal of Medical Internet Research (<ext-link ext-link-type="uri" xlink:href="https://www.jmir.org">https://www.jmir.org</ext-link>), 6.5.2026. </copyright-statement><copyright-year>2026</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in the Journal of Medical Internet Research (ISSN 1438-8871), is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="https://www.jmir.org/">https://www.jmir.org/</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://www.jmir.org/2026/1/e91659"/><abstract><sec><title>Background</title><p>Deep learning (DL) algorithms for digital breast tomosynthesis (DBT) have proliferated, demonstrating emerging potential in enhancing lesion detection and classification.</p></sec><sec><title>Objective</title><p>This study aimed to compare the diagnostic performance of DL algorithms for DBT with that of radiologists of varying experience and assess the clinical impact of DL assistance.</p></sec><sec sec-type="methods"><title>Methods</title><p>A systematic search of PubMed, Embase, Web of Science, and the Cochrane Library was conducted up to November 8, 2025. Included studies compared the performance of stand-alone DL algorithms for DBT, radiologist interpretation alone, and DL-assisted diagnosis. Study quality was assessed using the Prediction Model Risk of Bias Assessment Tool+Artificial Intelligence (PROBAST+AI). Performance metrics were pooled using bivariate random effects and generalized linear mixed models.</p></sec><sec sec-type="results"><title>Results</title><p>A total of 13 studies with 38,565 patients were included in the final analysis. Stand-alone DL algorithms achieved a pooled sensitivity of 0.88 (95% CI 0.80-0.93), specificity of 0.74 (95% CI 0.59-0.85), and area under the receiver operating characteristic curve (AUC) of 0.89 (95% CI 0.86-0.92). While DL performance showed no statistically significant difference compared to all radiologists (AUC=0.89 vs 0.88; <italic>P</italic>=.64) or senior radiologists (AUC=0.89 vs 0.90; <italic>P</italic>=.48), DL demonstrated significantly superior sensitivity compared to junior radiologists (0.88 vs 0.76; <italic>P</italic>=.03). Notably, DL assistance did not statistically improve diagnostic metrics for radiologists across any experience level. Meta-regression identified validation methods as a significant source of heterogeneity.</p></sec><sec sec-type="conclusions"><title>Conclusions</title><p>DL algorithms for DBT exhibited strong diagnostic proficiency and showed higher sensitivity than junior radiologists, suggesting their potential utility as adjunctive tools to help reduce oversight in less experienced settings. However, given that DL assistance did not significantly elevate overall human performance, current models act primarily as supplementary aids rather than definitive clinical tools. Future prospective multimodal studies are warranted to validate these findings and optimize clinical integration.</p></sec><sec><title>Trial Registration</title><p>PROSPERO CRD420251242858; https://www.crd.york.ac.uk/PROSPERO/view/CRD420251242858</p></sec></abstract><kwd-group><kwd>digital breast tomosynthesis</kwd><kwd>deep learning</kwd><kwd>breast neoplasms</kwd><kwd>diagnostic accuracy</kwd><kwd>meta-analysis</kwd><kwd>artificial intelligence</kwd><kwd>AI</kwd></kwd-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><p>Breast cancer is the most commonly diagnosed cancer and the leading cause of cancer deaths among women worldwide, with an estimated 2.3 million new cases and 666,000 deaths occurring worldwide in 2022. This profound global burden underscores the urgency for early and accurate detection to improve prognosis and reduce the burden of invasive treatments [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref2">2</xref>]. While imaging serves as the cornerstone of screening, the transition from digital mammography to digital breast tomosynthesis (DBT) has revolutionized clinical practice. By providing quasi-3D volumetric data, DBT significantly alleviates the issue of tissue superposition, thereby increasing cancer detection rates and reducing unnecessary recall rates [<xref ref-type="bibr" rid="ref3">3</xref>].</p><p>Despite these advantages, DBT introduces new challenges. The substantial increase in image volume significantly prolongs interpretation time, contributing to radiologist fatigue and potential cognitive overload [<xref ref-type="bibr" rid="ref4">4</xref>,<xref ref-type="bibr" rid="ref5">5</xref>]. Furthermore, despite overall improvements in detection rates, challenges regarding false positives (FPs) and false negatives (FNs) persist, with specific subtypes such as invasive lobular carcinoma prone to being overlooked [<xref ref-type="bibr" rid="ref4">4</xref>,<xref ref-type="bibr" rid="ref6">6</xref>]. The interpretation of DBT imagery is also heavily dependent on the radiologist&#x2019;s experience, which increases subjectivity and the risk of misdiagnosis [<xref ref-type="bibr" rid="ref7">7</xref>].</p><p>To address these diagnostic bottlenecks, deep learning (DL) algorithms for DBT have been increasingly developed, demonstrating emerging potential in enhancing lesion detection and classification. These algorithms aim to assist radiologists by extracting complex feature representations that may be imperceptible to the human eye [<xref ref-type="bibr" rid="ref7">7</xref>]. However, results across the literature are inconsistent. While some pivotal studies suggest DL superiority [<xref ref-type="bibr" rid="ref8">8</xref>], others indicate that algorithms may struggle with FPs or lack generalizability across different vendors and populations [<xref ref-type="bibr" rid="ref9">9</xref>,<xref ref-type="bibr" rid="ref10">10</xref>]. Moreover, the comparative performance of DL against radiologists of varying expertise (eg, junior and senior radiologists) remains a subject of ongoing debate [<xref ref-type="bibr" rid="ref9">9</xref>,<xref ref-type="bibr" rid="ref10">10</xref>]. This review includes data from 38,565 patients across the included studies.</p><p>Given the rapid accumulation of new evidence and the heterogeneity of study designs, a rigorous synthesis of current data is warranted. Therefore, the purpose of this systematic review and meta-analysis was to comprehensively compare the relative diagnostic performance and added value of DL algorithms vs radiologists of varying experience levels and evaluate potential factors influencing the diagnostic performance of these algorithms.</p></sec><sec id="s2" sec-type="methods"><title>Methods</title><p>The meta-analysis was carried out in full compliance with the PRISMA-DTA (Preferred Reporting Items for Systematic Reviews and Meta-Analyses extension for Diagnostic Test Accuracy) guidelines [<xref ref-type="bibr" rid="ref11">11</xref>]. Furthermore, the protocol for this study was registered with PROSPERO (CRD420251242858).</p><sec id="s2-1"><title>Ethical Considerations</title><p>This was a systematic review and meta-analysis, so ethics approval and consent to participate are not applicable.</p></sec><sec id="s2-2"><title>Search Strategy</title><p>A comprehensive literature search was conducted up to November 8, 2025, across 4 electronic databases: PubMed, Embase, Web of Science, and Cochrane Library. The search strategy, designed and executed to maximize sensitivity, used a combination of free-text terms and controlled vocabulary (eg, MeSH [Medical Subject Headings] terms in PubMed). Key concepts included three domains: (1) artificial intelligence (AI; eg, &#x201C;Deep Learning&#x201D;), (2) the target disease (eg, &#x201C;Breast Neoplasms&#x201D;), and (3) the imaging modality (eg, &#x201C;Digital Breast Tomosynthesis&#x201D;). No restrictions were placed on language, publication date, or study type. Two independent reviewers (SL and YM) performed initial title and abstract screening followed by full-text assessment of potentially eligible studies. To ensure literature saturation, the reference lists of all included articles were manually screened. The full, detailed search syntax for each database is provided in Table S1 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p></sec><sec id="s2-3"><title>Inclusion and Exclusion Criteria</title><p>Studies were selected according to the participants, index test, target condition, reference standard, outcomes, and setting framework: women undergoing breast cancer screening via DBT (participants); evaluated the performance of stand-alone DL algorithms for DBT, independent radiologist interpretation, or DL-assisted radiologist diagnosis (index test); breast cancer, confirmed via histopathology for positive cases (target condition); final diagnosis based on histopathology (for positive cases) or clinical imaging follow-up (for negative cases; reference standard); primary outcomes, including diagnostic performance measures (sensitivity, specificity, and area under the receiver operating characteristic [ROC] curve [AUC]), and secondary outcomes, comprising clinical impact metrics (detection rate, positive predictive value [PPV], and recall rate; outcomes); and original studies using retrospective or prospective cohorts from screening programs or clinical databases (setting).</p><p>In addition, we systematically excluded studies whose titles and abstracts were clearly irrelevant, as well as noneligible publication types, including reviews, case reports, conference abstracts, meta-analyses, and letters to the editor. Furthermore, studies that did not involve DL for DBT; without algorithm comparison; and with true positive (TP), FP, true negative (TN), and FN data not available were also excluded. The screening process was conducted in duplicate by 2 independent reviewers (SL and ZW), with any disagreements resolved through consultation with a third reviewer (XP).</p></sec><sec id="s2-4"><title>Quality Assessment and Certainty of Evidence</title><p>We used the updated Prediction Model Risk of Bias Assessment Tool+AI (PROBAST+AI) quality assessment tool [<xref ref-type="bibr" rid="ref12">12</xref>], which replaces the original 2019 Prediction Model Risk of Bias Assessment Tool instrument. This tool adopts a 2-phase structure comprising model development and model evaluation. Each phase includes 7 domains addressing participants, data sources, predictors, outcome assessment, and analytical approaches. For each domain, the risk-of-bias judgment is categorized as low, high, or unclear based on responses to predefined signaling questions. The full set of signaling questions and evaluation templates can be found in Tables S2 and S3 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>. To ensure objectivity and accuracy in the evaluation process, 2 reviewers (SL and LW) independently assessed the risk of bias in the included studies. The certainty of evidence was evaluated using the Grading of Recommendations Assessment, Development, and Evaluation (GRADE) framework [<xref ref-type="bibr" rid="ref13">13</xref>]. Detailed evaluation items, decision rules, and domain-specific judgments can be found in Table S4 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p></sec><sec id="s2-5"><title>Data Extraction</title><p>Two reviewers (SL and YM) independently extracted data from the included full-text articles, and disagreements were resolved through consultation with a third reviewer (XP). Extracted items included patient characteristics, details of DL methods, DBT imaging parameters, stand-alone DL performance, radiologist performance, and radiologist performance with DL assistance. Because most studies did not report full contingency tables, we used two strategies to derive TP, FP, FN, and TN values: (1) back calculation using reported sensitivity, specificity, number of positive cases, and total sample size; and (2) redigitizing ROC curves using the GetData software to obtain optimal sensitivity and specificity based on the Youden index. For DL algorithms, only validation set results were collected. When multiple DL models or radiologists were reported with overlapping cohorts, only the best-performing result (highest AUC) was extracted to avoid duplication.</p></sec><sec id="s2-6"><title>Outcome Measures</title><p>The primary outcome measures included the sensitivity, specificity, and AUC of DL algorithms, radiologists, and radiologists assisted by DL, as well as detection rate, PPV, and recall rate. Secondary outcomes focused on diagnostic performance stratified by radiologist experience both with and without DL assistance. Sensitivity (TP rate) reflected the ability to correctly identify cancer cases and was calculated as TP/(TP + FN) &#x00D7; 100%. Specificity (TN rate) represented the ability to correctly identify noncancer cases, calculated as TN/(TN + FP) &#x00D7; 100%. AUC summarizes overall discriminative ability. Detection rate was defined as TP/N &#x00D7; 100%, representing the proportion of cancers correctly detected in the screening population. PPV was calculated as TP/(TP + FP) &#x00D7; 100%, and recall rate was calculated as (TP + FP)/N &#x00D7; 100%. Radiologists with less than 5 years of experience were classified as junior, those with 5 or more years of experience were classified as senior, and studies without explicit experience data were categorized as unspecified.</p></sec><sec id="s2-7"><title>Statistical Analysis</title><p>Given the expected methodological heterogeneity across studies, we applied a bivariate random-effects model [<xref ref-type="bibr" rid="ref14">14</xref>] to pool sensitivity, specificity, and AUC estimates. For PPV, recall rate, and detection rate, log-transformation was performed prior to synthesis using a random-effects generalized linear mixed model framework. Differences in pooled diagnostic performance were assessed using a mean <italic>Z</italic>-test, with statistical significance defined as a <italic>P</italic> value of less than .05. Heterogeneity was quantified using the Higgins <italic>I</italic><sup>2</sup> statistic [<xref ref-type="bibr" rid="ref15">15</xref>]. For substantial heterogeneity (<italic>I</italic><sup>2</sup>&#x003E;50%), bivariate box plots were used to explore potential sources, and multivariable meta-regression was conducted for DL algorithms to evaluate the impact of validation strategy, study design, region of interest, and data splitting method. Temporal changes in DL performance were examined using bubble plots, whereas violin plots were used to visualize differences in radiologist performance before and after DL assistance. Fagan nomograms were generated to assess the clinical implications for patients. The assessment of publication bias was conducted using the Deeks funnel plot [<xref ref-type="bibr" rid="ref16">16</xref>]. Analyses were executed using Stata (version 15.1; StataCorp) with the <italic>midas</italic> and <italic>metadta</italic> commands, as well as R (version 4.5.1; R Foundation for Statistical Computing) using the <italic>ggplot2</italic> and <italic>tidyverse</italic> packages.</p></sec></sec><sec id="s3" sec-type="results"><title>Results</title><sec id="s3-1"><title>Literature Search and Study Selection</title><p>A total of 1076 potentially relevant records were identified through the initial database search. Of these 1076 records, after removing 422 (39.2%) duplicates, 654 (60.8%) proceeded to title and abstract screening. During this stage, of the 654 remaining articles, 634 (96.9%) were excluded due to clear irrelevance or noneligible publication types. The remaining 20 articles were assessed in full text. Following detailed evaluation, of these 20 articles, 2 (10%) [<xref ref-type="bibr" rid="ref17">17</xref>,<xref ref-type="bibr" rid="ref18">18</xref>] were excluded because the data required to construct contingency tables (TP, FP, TN, and FN) were unavailable, 4 (20%) [<xref ref-type="bibr" rid="ref19">19</xref>-<xref ref-type="bibr" rid="ref22">22</xref>] were excluded because they were not research on DL for DBT, and 3 (15%) [<xref ref-type="bibr" rid="ref23">23</xref>-<xref ref-type="bibr" rid="ref25">25</xref>] lacked direct algorithm comparisons. In addition, 2 eligible records were identified from nondatabase sources. Ultimately, 13 studies met all the inclusion criteria and were incorporated into the meta-analysis [<xref ref-type="bibr" rid="ref8">8</xref>-<xref ref-type="bibr" rid="ref10">10</xref>,<xref ref-type="bibr" rid="ref26">26</xref>-<xref ref-type="bibr" rid="ref35">35</xref>]. The study selection process followed the PRISMA (Preferred Reporting Items for Systematic Reviews and Meta-Analyses) guidelines and is illustrated in <xref ref-type="fig" rid="figure1">Figure 1</xref>.</p><fig position="float" id="figure1"><label>Figure 1.</label><caption><p>PRISMA (Preferred Reporting Items for Systematic Reviews and Meta-Analyses) flow diagram illustrating the study selection process. DBT: digital breast tomosynthesis; DL: deep learning; FN: false negative; FP: false positive; TN: true negative; TP: true positive.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="jmir_v28i1e91659_fig01.png"/></fig></sec><sec id="s3-2"><title>Study Description and Quality Assessment</title><p>A total of 13 studies met the eligibility criteria. Among these, 46.2% (n=6) [<xref ref-type="bibr" rid="ref8">8</xref>,<xref ref-type="bibr" rid="ref9">9</xref>,<xref ref-type="bibr" rid="ref28">28</xref>,<xref ref-type="bibr" rid="ref31">31</xref>,<xref ref-type="bibr" rid="ref32">32</xref>,<xref ref-type="bibr" rid="ref35">35</xref>] reported internal validation cohorts (n=25,885 patients), and 61.5% (n=8) [<xref ref-type="bibr" rid="ref9">9</xref>,<xref ref-type="bibr" rid="ref10">10</xref>,<xref ref-type="bibr" rid="ref26">26</xref>,<xref ref-type="bibr" rid="ref27">27</xref>,<xref ref-type="bibr" rid="ref29">29</xref>,<xref ref-type="bibr" rid="ref30">30</xref>,<xref ref-type="bibr" rid="ref33">33</xref>,<xref ref-type="bibr" rid="ref34">34</xref>] reported external validation cohorts (n=12,680 patients; n=1, 7.7% of the studies reported both). In total, 61.5% (n=8) of the studies [<xref ref-type="bibr" rid="ref9">9</xref>,<xref ref-type="bibr" rid="ref10">10</xref>,<xref ref-type="bibr" rid="ref26">26</xref>,<xref ref-type="bibr" rid="ref27">27</xref>,<xref ref-type="bibr" rid="ref29">29</xref>,<xref ref-type="bibr" rid="ref30">30</xref>,<xref ref-type="bibr" rid="ref33">33</xref>,<xref ref-type="bibr" rid="ref34">34</xref>] evaluated stand-alone DL algorithms, and 84.6% (n=11) [<xref ref-type="bibr" rid="ref8">8</xref>-<xref ref-type="bibr" rid="ref10">10</xref>,<xref ref-type="bibr" rid="ref26">26</xref>,<xref ref-type="bibr" rid="ref28">28</xref>-<xref ref-type="bibr" rid="ref33">33</xref>,<xref ref-type="bibr" rid="ref35">35</xref>] assessed changes in radiologist performance before and after DL assistance. All studies (n=13, 100%) [<xref ref-type="bibr" rid="ref8">8</xref>-<xref ref-type="bibr" rid="ref10">10</xref>,<xref ref-type="bibr" rid="ref26">26</xref>-<xref ref-type="bibr" rid="ref35">35</xref>] reported outcomes for the overall radiologist group, whereas 15.4% (n=2) [<xref ref-type="bibr" rid="ref10">10</xref>,<xref ref-type="bibr" rid="ref35">35</xref>] included junior radiologists, 30.8% (n=4) [<xref ref-type="bibr" rid="ref10">10</xref>,<xref ref-type="bibr" rid="ref27">27</xref>,<xref ref-type="bibr" rid="ref31">31</xref>,<xref ref-type="bibr" rid="ref35">35</xref>] included senior radiologists, and 69.2% (n=9) [<xref ref-type="bibr" rid="ref8">8</xref>,<xref ref-type="bibr" rid="ref9">9</xref>,<xref ref-type="bibr" rid="ref26">26</xref>,<xref ref-type="bibr" rid="ref28">28</xref>-<xref ref-type="bibr" rid="ref30">30</xref>,<xref ref-type="bibr" rid="ref32">32</xref>-<xref ref-type="bibr" rid="ref34">34</xref>] involved radiologists with unspecified experience levels. The included studies were published between 2017 and 2025. A total of 92.3% (n=12) [<xref ref-type="bibr" rid="ref8">8</xref>-<xref ref-type="bibr" rid="ref10">10</xref>,<xref ref-type="bibr" rid="ref27">27</xref>-<xref ref-type="bibr" rid="ref35">35</xref>] of the studies were retrospective in design, and 7.7% (n=1) [<xref ref-type="bibr" rid="ref26">26</xref>] were prospective; all used pathological biopsy as the reference standard. Detailed characteristics are summarized in Tables S5 to S11 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p><p>The quality assessment using the PROBAST+AI tool is shown in <xref ref-type="fig" rid="figure2">Figure 2</xref> and Tables S2 and S3 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>. For model development, no study was rated as high risk in terms of either overall risk of bias or applicability concerns. For model evaluation, 50% (4/8) of the studies were judged to have high overall risk of bias, whereas none showed high applicability concerns. Overall, although the risk of bias in the evaluation phase was notable, the applicability of the included studies was generally acceptable. According to the GRADE framework, the certainty of the evidence ranged from low to moderate, primarily downgraded due to risk of bias and imprecision, as detailed in Table S4 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p><fig position="float" id="figure2"><label>Figure 2.</label><caption><p>Risk of bias and applicability concerns of the included studies using the Prediction Model Risk of Bias Assessment Tool+Artificial Intelligence (PROBAST+AI) tool: (A) summary of PROBAST+AI assessment for model development and (B) summary of PROBAST+AI assessment for model evaluation.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="jmir_v28i1e91659_fig02.png"/></fig></sec><sec id="s3-3"><title>DL Algorithms</title><p>The sensitivity of DL algorithms was found to be 0.88 (95% CI 0.80-0.93; <italic>I</italic><sup>2</sup>=96.22%; low certainty), and the specificity was 0.74 (95% CI 0.59-0.85; <italic>I</italic><sup>2</sup>=99.63%; low certainty). The AUC was 0.89 (95% CI 0.86-0.92; low certainty), whereas the detection rate was 0.14 (95% CI 0.06-0.29; <italic>I</italic><sup>2</sup>=98.8%; low certainty). The PPV was 0.41 (95% CI 0.18-0.70; <italic>I</italic><sup>2</sup>=99.1%; low certainty), and the recall rate was 0.39 (95% CI 0.27-0.53; <italic>I</italic><sup>2</sup>=99.7%; low certainty). As shown in <xref ref-type="table" rid="table1">Tables 1</xref> and <xref ref-type="table" rid="table2">2</xref> and Figures S1 to S5 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>, the bubble chart indicates that the AUC values remained relatively stable from 2021 to 2024, as shown in <xref ref-type="fig" rid="figure3">Figure 3A</xref>.</p><table-wrap id="t1" position="float"><label>Table 1.</label><caption><p>Deep learning (DL) algorithms for digital breast tomosynthesis (DBT) vs radiologists of different experience levels in terms of diagnostic performance outcomes for breast cancer diagnosis.</p></caption><table id="table1" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Subgroup</td><td align="left" valign="bottom">Sample size, n</td><td align="left" valign="bottom">Validation datasets, n</td><td align="left" valign="bottom">Sensitivity (95% CI)</td><td align="left" valign="bottom" colspan="2">Difference in sensitivity<sup><xref ref-type="table-fn" rid="table1fn1">a</xref></sup></td><td align="left" valign="bottom">Specificity (95% CI)</td><td align="left" valign="bottom" colspan="2">Difference in specificity<sup><xref ref-type="table-fn" rid="table1fn1">a</xref></sup></td><td align="left" valign="bottom">AUC<sup><xref ref-type="table-fn" rid="table1fn2">b</xref></sup> (95% CI)</td><td align="left" valign="bottom" colspan="2">Difference in AUC<sup><xref ref-type="table-fn" rid="table1fn1">a</xref></sup></td></tr></thead><tbody><tr><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"><italic>Z</italic>-score</td><td align="left" valign="top"><italic>P</italic> value</td><td align="left" valign="top"/><td align="left" valign="top"><italic>Z</italic>-score</td><td align="left" valign="top"><italic>P</italic> value</td><td align="left" valign="top"/><td align="left" valign="top"><italic>Z</italic>-score</td><td align="left" valign="top"><italic>P</italic> value</td></tr><tr><td align="left" valign="top">Overall DL for DBT</td><td align="left" valign="top">12,555</td><td align="left" valign="top">8</td><td align="left" valign="top">0.88 (0.80-0.93)</td><td align="left" valign="top">&#x2014;<sup><xref ref-type="table-fn" rid="table1fn3">c</xref></sup></td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">0.74 (0.59&#x2010;0.85)</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">0.89 (0.86-0.92)</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">&#x2014;</td></tr><tr><td align="left" valign="top">DL for DBT internal validation</td><td align="left" valign="top">5182</td><td align="left" valign="top">1</td><td align="left" valign="top">0.96 (0.90-1.00)</td><td align="left" valign="top">2.63</td><td align="char" char="." valign="top">.009</td><td align="left" valign="top">0.40 (0.03-0.76)</td><td align="left" valign="top">1.95</td><td align="char" char="." valign="top">.05</td><td align="left" valign="top">0.84 (0.78-0.89)</td><td align="left" valign="top">1.62</td><td align="char" char="." valign="top">.11</td></tr><tr><td align="left" valign="top">DL for DBT external validation</td><td align="left" valign="top">7373</td><td align="left" valign="top">7</td><td align="left" valign="top">0.85 (0.77-0.90)</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">0.78 (0.65-0.87)</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">0.89 (0.86-0.91)</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">&#x2014;</td></tr><tr><td align="left" valign="top" colspan="4">DL for DBT vs all radiologists</td><td align="left" valign="top">1.33</td><td align="char" char="." valign="top">.18</td><td align="left" valign="top">&#x2003;</td><td align="left" valign="top">1.51</td><td align="char" char="." valign="top">.13</td><td align="left" valign="top">&#x2003;</td><td align="left" valign="top">0.46</td><td align="char" char="." valign="top">.64</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>DL for DBT</td><td align="left" valign="top">12,555</td><td align="left" valign="top">8</td><td align="left" valign="top">0.88 (0.80-0.93)</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top">0.74 (0.59-0.85)</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top">0.89 (0.86-0.92)</td><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>All radiologists</td><td align="left" valign="top">36,245</td><td align="left" valign="top">14</td><td align="left" valign="top">0.83 (0.79-0.86)</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top">0.86 (0.75-0.92)</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top">0.88 (0.85-0.91)</td><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top" colspan="4">DL for DBT vs junior radiologists</td><td align="left" valign="top">2.12</td><td align="char" char="." valign="top">.03</td><td align="left" valign="top">&#x2003;</td><td align="left" valign="top">1.35</td><td align="char" char="." valign="top">.18</td><td align="left" valign="top">&#x2003;</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">&#x2014;</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>DL for DBT</td><td align="left" valign="top">12,555</td><td align="left" valign="top">8</td><td align="left" valign="top">0.88 (0.80-0.93)</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top">0.74 (0.59-0.85)</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top">0.89 (0.86-0.92)</td><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Junior radiologists</td><td align="left" valign="top">310</td><td align="left" valign="top">2</td><td align="left" valign="top">0.76 (0.66-0.84)</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top">0.94 (0.48-1.00)</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top">&#x2014;</td><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top" colspan="4">DL for DBT vs senior radiologists</td><td align="left" valign="top">0.50</td><td align="char" char="." valign="top">.62</td><td align="left" valign="top">&#x2003;</td><td align="left" valign="top">1.28</td><td align="char" char="." valign="top">.20</td><td align="left" valign="top">&#x2003;</td><td align="left" valign="top">.50</td><td align="char" char="." valign="top">.62</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>DL for DBT</td><td align="left" valign="top">12,555</td><td align="left" valign="top">8</td><td align="left" valign="top">0.88 (0.80-0.93)</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top">0.74 (0.59-0.85)</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top">0.89 (0.86-0.92)</td><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Senior radiologists</td><td align="left" valign="top">961</td><td align="left" valign="top">4</td><td align="left" valign="top">0.86 (0.81-0.90)</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top">0.93 (0.48-1.00)</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top">0.90 (0.87-0.92)</td><td align="left" valign="top"/><td align="left" valign="top"/></tr></tbody></table><table-wrap-foot><fn id="table1fn1"><p><sup>a</sup>On the basis of 2-sided <italic>Z</italic>-test.</p></fn><fn id="table1fn2"><p><sup>b</sup>AUC: area under the receiver operating characteristic curve.</p></fn><fn id="table1fn3"><p><sup>c</sup>Not applicable.</p></fn></table-wrap-foot></table-wrap><table-wrap id="t2" position="float"><label>Table 2.</label><caption><p>Deep learning (DL) for digital breast tomosynthesis (DBT) vs radiologists of different experience levels in terms of clinical impact outcomes for breast cancer diagnosis.</p></caption><table id="table2" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Subgroup</td><td align="left" valign="bottom">Sample size n</td><td align="left" valign="bottom">Validation datasets, n</td><td align="left" valign="bottom">Detection rate (95% CI)</td><td align="left" valign="bottom" colspan="2">Difference in detection rate<sup><xref ref-type="table-fn" rid="table2fn1">a</xref></sup></td><td align="left" valign="bottom">PPV<sup><xref ref-type="table-fn" rid="table2fn2">b</xref></sup> (95% CI)</td><td align="left" valign="bottom" colspan="2">Difference in PPV<sup><xref ref-type="table-fn" rid="table2fn1">a</xref></sup></td><td align="left" valign="bottom">Recall rate (95% CI)</td><td align="left" valign="bottom" colspan="2">Difference in recall rate<sup><xref ref-type="table-fn" rid="table2fn1">a</xref></sup></td></tr></thead><tbody><tr><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"><italic>Z</italic>-score</td><td align="left" valign="top"><italic>P</italic> value</td><td align="left" valign="top"/><td align="left" valign="top"><italic>Z</italic>-score</td><td align="left" valign="top"><italic>P</italic> value</td><td align="left" valign="top"/><td align="left" valign="top"><italic>Z</italic>-score</td><td align="left" valign="top"><italic>P</italic> value</td></tr><tr><td align="left" valign="top">Overall DL for DBT</td><td align="left" valign="top">12,555</td><td align="left" valign="top">8</td><td align="left" valign="top">0.14 (0.06-0.29)</td><td align="left" valign="top">&#x2014;<sup><xref ref-type="table-fn" rid="table2fn3">c</xref></sup></td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">0.41 (0.18-0.70)</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">0.39 (0.27-0.53)</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">&#x2014;</td></tr><tr><td align="left" valign="top">DL for DBT internal validation</td><td align="left" valign="top">5182</td><td align="left" valign="top">1</td><td align="left" valign="top">0.09 (0.08-0.09)</td><td align="left" valign="top">0.87</td><td align="left" valign="top">.38</td><td align="left" valign="top">0.13 (0.12-0.15)</td><td align="left" valign="top">2.33</td><td align="left" valign="top">.02</td><td align="left" valign="top">0.63 (0.62-0.65)</td><td align="left" valign="top">4.04</td><td align="left" valign="top">&#x003C;.001</td></tr><tr><td align="left" valign="top">DL for DBT external validation</td><td align="left" valign="top">7373</td><td align="left" valign="top">7</td><td align="left" valign="top">0.15 (0.06-0.33)</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">0.47 (0.19-0.76)</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">0.36 (0.24-0.50)</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">&#x2014;</td></tr><tr><td align="left" valign="top" colspan="4">DL for DBT vs all radiologists</td><td align="left" valign="top">0.18</td><td align="left" valign="top">.91</td><td align="left" valign="top">&#x2003;</td><td align="left" valign="top">1.27</td><td align="left" valign="top">.21</td><td align="left" valign="top">&#x2003;</td><td align="left" valign="top">0.96</td><td align="left" valign="top">.34</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>DL for DBT</td><td align="left" valign="top">12,555</td><td align="left" valign="top">8</td><td align="left" valign="top">0.14 (0.06-0.29)</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top">0.41 (0.18-0.70)</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top">0.39 (0.27-0.53)</td><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>All radiologists</td><td align="left" valign="top">36,245</td><td align="left" valign="top">14</td><td align="left" valign="top">0.15 (0.06-0.30)</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top">0.62 (0.40-0.79)</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top">0.30 (0.19-0.45)</td><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top" colspan="4">DL for DBT vs junior radiologists</td><td align="left" valign="top">5.07</td><td align="left" valign="top">&#x003E;.99</td><td align="left" valign="top">&#x2003;</td><td align="left" valign="top">3.41</td><td align="left" valign="top">&#x003C;.001</td><td align="left" valign="top">&#x2003;</td><td align="left" valign="top">2.05</td><td align="left" valign="top">.04</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>DL for DBT</td><td align="left" valign="top">12,555</td><td align="left" valign="top">8</td><td align="left" valign="top">0.14 (0.06-0.29)</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top">0.41 (0.18-0.70)</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top">0.39 (0.27-0.53)</td><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Junior radiologists</td><td align="left" valign="top">310</td><td align="left" valign="top">2</td><td align="left" valign="top">0.47 (0.42-0.53)</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top">0.87 (0.81-0.91)</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top">0.55 (0.47-0.63)</td><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top" colspan="4">DL for DBT vs senior radiologists</td><td align="left" valign="top">1.94</td><td align="left" valign="top">.05</td><td align="left" valign="top">&#x2003;</td><td align="left" valign="top">3.28</td><td align="left" valign="top">.001</td><td align="left" valign="top">&#x2003;</td><td align="left" valign="top">0.25</td><td align="left" valign="top">.81</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>DL for DBT</td><td align="left" valign="top">12,555</td><td align="left" valign="top">8</td><td align="left" valign="top">0.14 (0.06-0.29)</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top">0.41 (0.18-0.70)</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top">0.39 (0.27-0.53)</td><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Senior radiologists</td><td align="left" valign="top">961</td><td align="left" valign="top">4</td><td align="left" valign="top">0.36 (0.19-0.57)</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top">0.90 (0.70-0.97)</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top">0.43 (0.22-0.68)</td><td align="left" valign="top"/><td align="left" valign="top"/></tr></tbody></table><table-wrap-foot><fn id="table2fn1"><p><sup>a</sup>On the basis of 2-sided <italic>Z</italic>-test.</p></fn><fn id="table2fn2"><p><sup>b</sup>PPV: positive predictive value.</p></fn><fn id="table2fn3"><p><sup>c</sup>Not applicable.</p></fn></table-wrap-foot></table-wrap><fig position="float" id="figure3"><label>Figure 3.</label><caption><p>Bubble plot and violin plots: (A) bubble plot of temporal trends in area under the receiver operating characteristic curve (AUC) performance of different deep learning (DL) models, (B) violin plots of diagnostic outcomes for all radiologists before and after DL assistance, (C) violin plots of diagnostic outcomes for junior radiologists before and after DL assistance, and (D) violin plots of diagnostic outcomes for senior radiologists before and after DL assistance. AI: artificial intelligence; CNN: convolutional neural network; NA: not available; PPV: positive predictive value; ResNet: residual neural network.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="jmir_v28i1e91659_fig03.png"/></fig></sec><sec id="s3-4"><title>All Radiologists</title><p>The sensitivity of all radiologists was found to be 0.83 (95% CI 0.79-0.86; <italic>I</italic><sup>2</sup>=70.78%; low certainty), and the specificity was 0.86 (95% CI 0.75-0.92; <italic>I</italic><sup>2</sup>=99.35%; low certainty). The AUC was 0.88 (95% CI 0.85-0.91; low certainty), whereas the detection rate was 0.15 (95% CI 0.06-0.30; <italic>I</italic><sup>2</sup>=99.4%; moderate certainty). The PPV was 0.62 (95% CI 0.40-0.79; <italic>I</italic><sup>2</sup>=98%; low certainty), and the recall rate was 0.30 (95% CI 0.19-0.45; <italic>I</italic><sup>2</sup>=99.5%; low certainty). As shown in <xref ref-type="table" rid="table1">Table 1</xref> and 2 and Figures S6 to S10 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>, there were no statistically significant differences between the performance of DL algorithms and that of all radiologists across all metrics.</p></sec><sec id="s3-5"><title>Radiologists With Different Levels of Experience</title><p>DL algorithms for DBT achieved markedly higher sensitivity than junior radiologists (0.88 vs 0.76; <italic>Z</italic>=2.12; <italic>P</italic>=.03). However, DL algorithms for DBT exhibited significantly lower PPV than junior radiologists (0.41 vs 0.87; <italic>Z</italic>=3.41; <italic>P</italic>&#x003C;.001) and significantly lower recall rate than junior radiologists (0.39 vs 0.55; <italic>Z</italic>=2.05; <italic>P</italic>=.04). Additionally, DL algorithms for DBT had a significantly lower PPV than senior radiologists (0.41 vs 0.90; <italic>Z</italic>=3.28; <italic>P</italic>=.001). These findings are illustrated in <xref ref-type="table" rid="table1">Tables 1</xref> and <xref ref-type="table" rid="table2">2</xref>.</p></sec><sec id="s3-6"><title>Changes Before and After DL Assistance</title><p>With DL assistance, there were no significant improvements in any of the outcome measures for all radiologists. Similarly, there were no significant improvements in any of the outcome measures for junior or senior radiologists. These findings are shown in Tables S12 and S13 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p></sec><sec id="s3-7"><title>Heterogeneity Testing: Bivariate Box Plots and Meta-Regression</title><p>The bivariate box plots suggested that the studies by Shoshan et al [<xref ref-type="bibr" rid="ref9">9</xref>] and Pinto et al [<xref ref-type="bibr" rid="ref29">29</xref>] might have contributed to the heterogeneity of DL algorithms, whereas those by Bassi et al [<xref ref-type="bibr" rid="ref10">10</xref>] and Resch et al [<xref ref-type="bibr" rid="ref27">27</xref>] might have been sources of heterogeneity among all radiologists, as shown in <xref ref-type="fig" rid="figure4">Figures 4A and 4B</xref> [<xref ref-type="bibr" rid="ref9">9</xref>,<xref ref-type="bibr" rid="ref10">10</xref>,<xref ref-type="bibr" rid="ref27">27</xref>,<xref ref-type="bibr" rid="ref29">29</xref>,<xref ref-type="bibr" rid="ref35">35</xref>]. Meta-regression indicated that this heterogeneity primarily arose from differences in validation methods (internal validation vs external validation; specificity <italic>P</italic>=.05) and geographic regions (Europe vs North America; sensitivity <italic>P</italic>&#x003C;.001), as shown in Table S14 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p><fig position="float" id="figure4"><label>Figure 4.</label><caption><p>Bivariate box plots and Deeks funnel plots: (A) bivariate box plot for stand-alone deep learning (DL) for digital breast tomosynthesis (DBT) reading, (B) bivariate box plot for radiologist reading, (C) Deeks funnel plot for stand-alone DL for DBT reading, and (D) Deeks funnel plot for radiologist reading [<xref ref-type="bibr" rid="ref9">9</xref>,<xref ref-type="bibr" rid="ref10">10</xref>,<xref ref-type="bibr" rid="ref27">27</xref>,<xref ref-type="bibr" rid="ref29">29</xref>,<xref ref-type="bibr" rid="ref35">35</xref>]. ESS: effective sample size.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="jmir_v28i1e91659_fig04.png"/></fig></sec><sec id="s3-8"><title>Sensitivity Analysis</title><p>To assess the robustness of the primary findings, 2 sensitivity analyses were conducted. First, after excluding outliers identified by the bivariate box plot graphical diagnostic method, the analysis yielded a sensitivity of 0.86 (95% CI 0.78&#x2010;0.91), specificity of 0.74 (95% CI 0.62&#x2010;0.83), AUC of 0.88 (95% CI 0.85&#x2010;0.91), detection rate of 0.13 (95% CI 0.05&#x2010;0.33), PPV of 0.38 (95% CI 0.14&#x2010;0.69), and recall of 0.36 (95% CI 0.23&#x2010;0.53). Second, after excluding studies assessed as having a high risk of bias using the PROBAST+AI tool in the validation set, the recalculated pooled effect sizes were as follows: sensitivity of 0.86 (95% CI 0.77&#x2010;0.91), specificity of 0.80 (95% CI 0.57&#x2010;0.93), AUC of 0.90 (95% CI 0.87&#x2010;0.92), detection rate of 0.21 (95% CI 0.15&#x2010;0.27), PPV of 0.61 (95% CI 0.37&#x2010;0.81), and recall of 0.40 (95% CI 0.26&#x2010;0.56). These results were consistent with those of the primary analysis, indicating that the overall conclusions regarding diagnostic performance were robust, neither unduly influenced by high-risk studies nor biased by individual outliers. These findings are shown in Table S15 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p></sec><sec id="s3-9"><title>Clinical Application Value and Publication Bias</title><p>The Deeks funnel plot asymmetry test showed no evidence of publication bias (0.98 vs 0.09), as illustrated in <xref ref-type="fig" rid="figure4">Figures 4C and 4D</xref> [<xref ref-type="bibr" rid="ref9">9</xref>,<xref ref-type="bibr" rid="ref10">10</xref>,<xref ref-type="bibr" rid="ref27">27</xref>,<xref ref-type="bibr" rid="ref29">29</xref>,<xref ref-type="bibr" rid="ref35">35</xref>]. Using the median prevalence from the included studies as the prior probability, the Fagan nomogram for DL algorithms (median prevalence 26%, IQR 13.84%&#x2013;38.95%) indicated a positive posttest probability of 54%, while that for all radiologists (median prevalence 26%, IQR 12.60%&#x2013;55.01%) indicated a positive posttest probability of 67%, as shown in <xref ref-type="fig" rid="figure5">Figure 5</xref>.</p><fig position="float" id="figure5"><label>Figure 5.</label><caption><p>Fagan nomograms illustrating the posttest probability of breast cancer with digital breast tomosynthesis (DBT) reading: (A) stand-alone deep learning (DL) for DBT reading and (B) radiologist reading without DL assistance. LR: likelihood ratio.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="jmir_v28i1e91659_fig05.png"/></fig></sec></sec><sec id="s4" sec-type="discussion"><title>Discussion</title><sec id="s4-1"><title>Principal Findings</title><p>Our systematic review and meta-analysis demonstrated that DL algorithms using DBT achieved diagnostic proficiency comparable to the aggregate performance of radiologists and senior experts while exhibiting significantly superior sensitivity compared to junior radiologists. The observed parity between DL and senior radiologists suggests that current computational models have attained a level of pattern recognition equivalent to seasoned clinical judgment. However, the fact that DL has not significantly superseded senior radiologists implies that, while algorithms facilitate standardization, they currently lack the nuance required to outperform complex human decision-making [<xref ref-type="bibr" rid="ref36">36</xref>-<xref ref-type="bibr" rid="ref38">38</xref>]. Conversely, the significant performance gap regarding junior practitioners likely stems from the algorithms&#x2019; robust capacity to systematically analyze vast volumes of annotated data, enabling the detection of subtle or occult lesions that may be obscured by the subjectivity or limited pattern recognition characteristic of early-career experience [<xref ref-type="bibr" rid="ref39">39</xref>]. By minimizing the specific oversight errors associated with inexperience [<xref ref-type="bibr" rid="ref40">40</xref>], DL models effectively function as a &#x201C;safety net&#x201D; enhancing diagnostic sensitivity. Consequently, these findings support the integration of DL not as a stand-alone replacement for expert review but as a vital adjunctive tool to augment the capabilities of less experienced radiologists and bridge diagnostic disparities in resource-constrained environments.</p><p>Regarding clinical impact metrics, our initial analysis showed that DL algorithms had lower PPV and recall rates than junior radiologists. However, these findings should be interpreted with caution as they likely reflect spectrum bias and the limited number of studies available for the junior subgroup (2/13, 15.4%). Specifically, the junior radiologist data were derived from highly enriched cohorts, with a malignancy prevalence of 70% in the study by Chae et al [<xref ref-type="bibr" rid="ref35">35</xref>] and 57% positive findings in the study by Bassi et al [<xref ref-type="bibr" rid="ref10">10</xref>]. As PPV is a prevalence-dependent metric, the exceptionally high disease burden in these datasets artificially inflated the pooled estimates for the junior subgroup, reflecting fundamental differences in the underlying test populations rather than intrinsic radiologist superiority.</p><p>More importantly, an essential clinical counterpoint to the sensitivity advantage of DL algorithms is the significantly lower pooled PPV of stand-alone DL algorithms (0.41) compared to all radiologists (0.62) and senior radiologists (0.90). A PPV of 0.41 implies that less than half of the cases flagged by the DL algorithms are truly malignant, corresponding to a substantial FP burden. This finding is particularly contradictory as one of the primary, well-documented advantages of DBT over traditional mammography is its ability to reduce unnecessary recall rates. Using low-PPV DL algorithms as a primary or concurrent reading tool could erode this advantage, leading to a cascade of adverse consequences. These include unnecessary biopsies, heightened patient psychological distress and anxiety, and the overconsumption of health care resources&#x2014;costs that are thoroughly documented in breast screening literature. Therefore, we urge a cautious approach to claims advocating for current DL algorithms as universal standardization tools. Their integration should be highly context dependent: in settings experiencing a severe shortage of experienced radiologists, the sensitivity benefits may offset the FP costs; however, in fully resourced screening environments with available senior radiologists, the net clinical benefit of stand-alone DL remains uncertain. Before recommending these models as reliable standardization tools, future deployment frameworks must incorporate well-defined, acceptable safety thresholds for FP rates and prospectively evaluate downstream clinical and economic outcomes.</p><p>Interestingly, our analysis comparing radiologist performance with and without DL assistance revealed no statistically significant incremental benefit. This lack of synergistic enhancement likely results from a &#x201C;ceiling effect,&#x201D; where experienced radiologists and high-performing algorithms achieve similar diagnostic performance, leaving limited room for improvement [<xref ref-type="bibr" rid="ref41">41</xref>,<xref ref-type="bibr" rid="ref42">42</xref>]. Furthermore, the clinical utility of DL assistants is constrained by &#x201C;automation bias&#x201D; and the opaque nature of &#x201C;black box&#x201D; algorithms [<xref ref-type="bibr" rid="ref7">7</xref>,<xref ref-type="bibr" rid="ref43">43</xref>]. When radiologist confidence in the algorithm is low&#x2014;or when the AI serves merely as a concurrent reader without explainable features&#x2014;the translation of algorithmic output into improved decision-making is diminished [<xref ref-type="bibr" rid="ref44">44</xref>]. This suggests that, for DL to provide substantive added value, future systems must move beyond binary classification to provide interpretability and context-aware insights.</p><p>In comparison with a previous meta-analysis published by Yoon et al [<xref ref-type="bibr" rid="ref45">45</xref>] in 2023, which reported that stand-alone AI significantly outperformed radiologists (AUC=0.90 vs 0.79) for breast cancer diagnosis, our study presents more conservative findings (AUC=0.89 vs 0.88; no significant difference). This discrepancy is attributable to the substantial expansion of the evidence base in our review. By incorporating approximately 3 times the number of studies, our analysis mitigates small-study effects that may have overestimated AI superiority in previous reviews. Our analysis suggests that the diagnostic performance of DL algorithms is currently comparable with the aggregate performance of the overall radiologist population in DBT interpretation but not demonstrably superior.</p><p>Beyond prior analyses, we assessed the specific impact of the DL assistant by comparing diagnostic performance without and with the DL assistant, offering critical insights into the human-AI interaction. We also incorporated essential clinical impact metrics such as detection rate, PPV, and recall rate to assess practical utility beyond discrimination accuracy. Furthermore, the methodological rigor was strengthened through the application of the PROBAST+AI tool for quality assessment and the GRADE approach to evaluate the certainty of evidence. These enhancements and stratified analyses collectively supply new, high-quality evidence that clarifies the current application of DL in breast imaging.</p><p>The extreme heterogeneity observed in several pooled metrics (<italic>I</italic><sup>2</sup>&#x003E;98%) warrants careful interpretation. Although meta-regression identified the validation strategy (internal vs external) as a significant contributor to heterogeneity in specificity (<italic>P</italic>=.05), this single factor cannot account for the near-complete variance in metrics such as PPV and recall rate. We posit that the residual heterogeneity reflects fundamental differences inherent to the DL algorithms themselves. First, algorithm design thresholds vary considerably across vendors: some systems are deliberately calibrated toward high sensitivity for triage purposes [<xref ref-type="bibr" rid="ref27">27</xref>], whereas others prioritize specificity to minimize FP recalls, resulting in divergent operating points along the ROC curve. Second, training set composition, including differences in malignancy prevalence, patient demographics, geographic regions, mammographic density distribution, and scanner hardware, introduces domain-specific biases that cannot be homogenized through statistical pooling alone [<xref ref-type="bibr" rid="ref28">28</xref>]. Third, variability in FP tolerance thresholds across health care systems and screening programs further compounds interstudy differences [<xref ref-type="bibr" rid="ref9">9</xref>]. Given this landscape, future studies should prioritize standardized reporting of operating thresholds and algorithm configurations to facilitate more meaningful comparisons.</p><p>Additionally, bivariate box plots pinpointed the studies by Shoshan et al [<xref ref-type="bibr" rid="ref9">9</xref>] and Pinto et al [<xref ref-type="bibr" rid="ref29">29</xref>] as distinct outliers. This heterogeneity is mechanistically explainable: Shoshan et al [<xref ref-type="bibr" rid="ref9">9</xref>] optimized their algorithm for a &#x201C;triage&#x201D; workflow, deliberately sacrificing specificity to maximize sensitivity for ruling out normal cases, resulting in a skewed performance profile compared to standard diagnostic models. Meanwhile, the divergence in the study by Pinto et al [<xref ref-type="bibr" rid="ref29">29</xref>] is attributable to a small sample size (N=190), which introduces substantial statistical instability and random variation into the results.</p><p>Looking ahead, most of the DL algorithms included in our study were confined to single-modality DBT data, lacking the contextual depth provided by supplementary imaging and clinical history. Future iterations must prioritize multimodal integration to synthesize comprehensive diagnostic insights by correlating findings with the patient&#x2019;s clinical background [<xref ref-type="bibr" rid="ref46">46</xref>]. Implementation is further constrained by technical and systemic barriers, including data scarcity, regulatory challenges, and limited generalizability. While emerging techniques such as few-shot learning and self-supervised models may address these gaps, sustained multidisciplinary efforts are essential to optimize AI safety and deliver comprehensive solutions that genuinely augment radiological practice [<xref ref-type="bibr" rid="ref47">47</xref>]. Currently, the opacity of decision-making processes remains a critical hurdle, highlighting the urgent need for future AI systems to prioritize transparency and interpretability [<xref ref-type="bibr" rid="ref48">48</xref>].</p><p>Some limitations of this meta-analysis should be considered when interpreting the results. First, the preponderance of retrospective designs introduces potential selection bias, necessitating validation through large-scale prospective trials. Second, to distinguish between independent datasets and mitigate the risk of patient overlap, we extracted data exclusively from the highest-performing DL algorithm in each study context. This methodological necessity may introduce an optimism bias, potentially leading to an overestimation of the average algorithmic performance. Third, the limited number of studies stratified by radiologist experience (particularly for juniors) constrains the statistical robustness of subgroup analyses. Fourth, because most included studies did not report complete contingency tables, we used the GetData software to redigitize published ROC curves and derived operating points using the Youden index. This approach introduces two layers of methodological limitation: (1) the manual redigitization process could introduce subjective measurement error, potentially introducing small but nonnegligible inaccuracies in the extracted sensitivity and specificity values; and (2) more critically, the Youden index&#x2013;derived operating point represents a theoretical optimum rather than the actual clinical threshold used in each study&#x2019;s real-world setting. Clinically deployed algorithms may operate at thresholds deliberately chosen to balance sensitivity and specificity according to institutional recall policies or regulatory requirements. Consequently, our extracted performance values may not faithfully reflect the true clinical performance of these algorithms as implemented, and this methodological limitation should be considered when interpreting the pooled estimates.</p></sec><sec id="s4-2"><title>Conclusions</title><p>In conclusion, DL algorithms for DBT demonstrated strong diagnostic performance. Although initial subgroup analyses indicated potentially higher sensitivity than junior radiologists, these findings are based on limited studies and require extensive validation to confirm their reliability. The current lack of a significant incremental benefit in human-AI collaborative workflows suggests that AI implementation should be approached cautiously&#x2014;not as a stand-alone replacement or an automatic performance booster but rather as a supplementary &#x201C;second opinion.&#x201D; Future research must prioritize the development of explainable AI and prospective multimodal studies to better define the true synergistic potential of human-machine collaboration in breast cancer screening.</p></sec></sec></body><back><ack><p>During the preparation of this work, the authors used DeepSeek-V3.2 to improve readability and language quality. After using this tool, the authors reviewed and edited the content as needed and take full responsibility for the content of the publication.</p></ack><notes><sec><title>Funding</title><p>This research was funded by the National Natural Science Foundation of China (grant 8237153504).</p></sec><sec><title>Data Availability</title><p>The datasets generated or analyzed during this study are available from the corresponding author on reasonable request.</p></sec></notes><fn-group><fn fn-type="con"><p>Conceptualization: SL</p><p>Data curation: XP</p><p>Formal analysis: SL, ZW, YM, LW, XP</p><p>Investigation: ZW, YM, LW, XP</p><p>Methodology: ZW, YM, LW, XP</p><p>Software: SL</p><p>Writing&#x2014;original draft: SL</p><p>Writing&#x2014;review and editing: LW, XP</p><p>All authors read and approved the final manuscript.</p></fn><fn fn-type="conflict"><p>None declared.</p></fn></fn-group><glossary><title>Abbreviations</title><def-list><def-item><term id="abb1">AI</term><def><p>artificial intelligence</p></def></def-item><def-item><term id="abb2">AUC</term><def><p>area under the receiver operating characteristic curve</p></def></def-item><def-item><term id="abb3">DBT</term><def><p>digital breast tomosynthesis</p></def></def-item><def-item><term id="abb4">DL</term><def><p>deep learning</p></def></def-item><def-item><term id="abb5">FN</term><def><p>false negative</p></def></def-item><def-item><term id="abb6">FP</term><def><p>false positive</p></def></def-item><def-item><term id="abb7">GRADE</term><def><p>Grading of Recommendations Assessment, Development, and Evaluation</p></def></def-item><def-item><term id="abb8">MeSH</term><def><p>Medical Subject Headings</p></def></def-item><def-item><term id="abb9">PPV</term><def><p>positive predictive value</p></def></def-item><def-item><term id="abb10">PRISMA</term><def><p>Preferred Reporting Items for Systematic Reviews and Meta-Analyses</p></def></def-item><def-item><term id="abb11">PRISMA-DTA</term><def><p>Preferred Reporting Items for Systematic Reviews and Meta-Analyses extension for Diagnostic Test Accuracy</p></def></def-item><def-item><term id="abb12">PROBAST+AI</term><def><p>Prediction Model Risk of Bias Assessment Tool+Artificial Intelligence</p></def></def-item><def-item><term id="abb13">ROC</term><def><p>receiver operating characteristic</p></def></def-item><def-item><term id="abb14">TN</term><def><p>true negative</p></def></def-item><def-item><term id="abb15">TP</term><def><p>true positive</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Giaquinto</surname><given-names>AN</given-names> </name><name name-style="western"><surname>Sung</surname><given-names>H</given-names> </name><name name-style="western"><surname>Miller</surname><given-names>KD</given-names> </name><etal/></person-group><article-title>Breast cancer statistics, 2022</article-title><source>CA Cancer J Clin</source><year>2022</year><month>11</month><volume>72</volume><issue>6</issue><fpage>524</fpage><lpage>541</lpage><pub-id pub-id-type="doi">10.3322/caac.21754</pub-id><pub-id pub-id-type="medline">36190501</pub-id></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Bray</surname><given-names>F</given-names> </name><name name-style="western"><surname>Laversanne</surname><given-names>M</given-names> </name><name name-style="western"><surname>Sung</surname><given-names>H</given-names> </name><etal/></person-group><article-title>Global cancer statistics 2022: GLOBOCAN estimates of incidence and mortality worldwide for 36 cancers in 185 countries</article-title><source>CA Cancer J Clin</source><year>2024</year><volume>74</volume><issue>3</issue><fpage>229</fpage><lpage>263</lpage><pub-id pub-id-type="doi">10.3322/caac.21834</pub-id><pub-id pub-id-type="medline">38572751</pub-id></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Chong</surname><given-names>A</given-names> </name><name name-style="western"><surname>Weinstein</surname><given-names>SP</given-names> </name><name name-style="western"><surname>McDonald</surname><given-names>ES</given-names> </name><name name-style="western"><surname>Conant</surname><given-names>EF</given-names> </name></person-group><article-title>Digital breast tomosynthesis: concepts and clinical practice</article-title><source>Radiology</source><year>2019</year><month>07</month><volume>292</volume><issue>1</issue><fpage>1</fpage><lpage>14</lpage><pub-id pub-id-type="doi">10.1148/radiol.2019180760</pub-id><pub-id pub-id-type="medline">31084476</pub-id></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Gao</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Moy</surname><given-names>L</given-names> </name><name name-style="western"><surname>Heller</surname><given-names>SL</given-names> </name></person-group><article-title>Digital breast tomosynthesis: update on technology, evidence, and clinical practice</article-title><source>Radiographics</source><year>2021</year><volume>41</volume><issue>2</issue><fpage>321</fpage><lpage>337</lpage><pub-id pub-id-type="doi">10.1148/rg.2021200101</pub-id><pub-id pub-id-type="medline">33544665</pub-id></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Raichand</surname><given-names>S</given-names> </name><name name-style="western"><surname>Blaya-Novakova</surname><given-names>V</given-names> </name><name name-style="western"><surname>Berber</surname><given-names>S</given-names> </name><name name-style="western"><surname>Livingstone</surname><given-names>A</given-names> </name><name name-style="western"><surname>Noguchi</surname><given-names>N</given-names> </name><name name-style="western"><surname>Houssami</surname><given-names>N</given-names> </name></person-group><article-title>Digital breast tomosynthesis for breast cancer diagnosis in women with dense breasts and additional breast cancer risk factors: a systematic review</article-title><source>Breast</source><year>2024</year><month>10</month><volume>77</volume><fpage>103767</fpage><pub-id pub-id-type="doi">10.1016/j.breast.2024.103767</pub-id><pub-id pub-id-type="medline">38996609</pub-id></nlm-citation></ref><ref id="ref6"><label>6</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wang</surname><given-names>R</given-names> </name><name name-style="western"><surname>Chen</surname><given-names>F</given-names> </name><name name-style="western"><surname>Chen</surname><given-names>H</given-names> </name><etal/></person-group><article-title>Deep learning in digital breast tomosynthesis: current status, challenges, and future trends</article-title><source>MedComm (2020)</source><year>2025</year><month>06</month><volume>6</volume><issue>6</issue><fpage>e70247</fpage><pub-id pub-id-type="doi">10.1002/mco2.70247</pub-id><pub-id pub-id-type="medline">40491967</pub-id></nlm-citation></ref><ref id="ref7"><label>7</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Bai</surname><given-names>J</given-names> </name><name name-style="western"><surname>Posner</surname><given-names>R</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>T</given-names> </name><name name-style="western"><surname>Yang</surname><given-names>C</given-names> </name><name name-style="western"><surname>Nabavi</surname><given-names>S</given-names> </name></person-group><article-title>Applying deep learning in digital breast tomosynthesis for automatic breast cancer detection: a review</article-title><source>Med Image Anal</source><year>2021</year><month>07</month><volume>71</volume><fpage>102049</fpage><pub-id pub-id-type="doi">10.1016/j.media.2021.102049</pub-id><pub-id pub-id-type="medline">33901993</pub-id></nlm-citation></ref><ref id="ref8"><label>8</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Balleyguier</surname><given-names>C</given-names> </name><name name-style="western"><surname>Arfi-Rouche</surname><given-names>J</given-names> </name><name name-style="western"><surname>Levy</surname><given-names>L</given-names> </name><etal/></person-group><article-title>Improving digital breast tomosynthesis reading time: a pilot multi-reader, multi-case study using concurrent Computer-Aided Detection (CAD)</article-title><source>Eur J Radiol</source><year>2017</year><month>12</month><volume>97</volume><fpage>83</fpage><lpage>89</lpage><pub-id pub-id-type="doi">10.1016/j.ejrad.2017.10.014</pub-id><pub-id pub-id-type="medline">29153373</pub-id></nlm-citation></ref><ref id="ref9"><label>9</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Shoshan</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Bakalo</surname><given-names>R</given-names> </name><name name-style="western"><surname>Gilboa-Solomon</surname><given-names>F</given-names> </name><etal/></person-group><article-title>Artificial intelligence for reducing workload in breast cancer screening with digital breast tomosynthesis</article-title><source>Radiology</source><year>2022</year><month>04</month><volume>303</volume><issue>1</issue><fpage>69</fpage><lpage>77</lpage><pub-id pub-id-type="doi">10.1148/radiol.211105</pub-id><pub-id pub-id-type="medline">35040677</pub-id></nlm-citation></ref><ref id="ref10"><label>10</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Bassi</surname><given-names>E</given-names> </name><name name-style="western"><surname>Russo</surname><given-names>A</given-names> </name><name name-style="western"><surname>Oliboni</surname><given-names>E</given-names> </name><etal/></person-group><article-title>The role of an artificial intelligence software in clinical senology: a mammography multi-reader study</article-title><source>Radiol Med</source><year>2024</year><month>02</month><volume>129</volume><issue>2</issue><fpage>202</fpage><lpage>210</lpage><pub-id pub-id-type="doi">10.1007/s11547-023-01751-1</pub-id><pub-id pub-id-type="medline">38082194</pub-id></nlm-citation></ref><ref id="ref11"><label>11</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Salameh</surname><given-names>JP</given-names> </name><name name-style="western"><surname>Bossuyt</surname><given-names>PM</given-names> </name><name name-style="western"><surname>McGrath</surname><given-names>TA</given-names> </name><etal/></person-group><article-title>Preferred reporting items for systematic review and meta-analysis of diagnostic test accuracy studies (PRISMA-DTA): explanation, elaboration, and checklist</article-title><source>BMJ</source><year>2020</year><month>08</month><day>14</day><volume>370</volume><fpage>m2632</fpage><pub-id pub-id-type="doi">10.1136/bmj.m2632</pub-id><pub-id pub-id-type="medline">32816740</pub-id></nlm-citation></ref><ref id="ref12"><label>12</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Moons</surname><given-names>KG</given-names> </name><name name-style="western"><surname>Damen</surname><given-names>JA</given-names> </name><name name-style="western"><surname>Kaul</surname><given-names>T</given-names> </name><etal/></person-group><article-title>PROBAST+AI: an updated quality, risk of bias, and applicability assessment tool for prediction models using regression or artificial intelligence methods</article-title><source>BMJ</source><year>2025</year><month>03</month><day>24</day><volume>388</volume><fpage>e082505</fpage><pub-id pub-id-type="doi">10.1136/bmj-2024-082505</pub-id><pub-id pub-id-type="medline">40127903</pub-id></nlm-citation></ref><ref id="ref13"><label>13</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Gopalakrishna</surname><given-names>G</given-names> </name><name name-style="western"><surname>Mustafa</surname><given-names>RA</given-names> </name><name name-style="western"><surname>Davenport</surname><given-names>C</given-names> </name><etal/></person-group><article-title>Applying Grading of Recommendations Assessment, Development and Evaluation (GRADE) to diagnostic tests was challenging but doable</article-title><source>J Clin Epidemiol</source><year>2014</year><month>07</month><volume>67</volume><issue>7</issue><fpage>760</fpage><lpage>768</lpage><pub-id pub-id-type="doi">10.1016/j.jclinepi.2014.01.006</pub-id><pub-id pub-id-type="medline">24725643</pub-id></nlm-citation></ref><ref id="ref14"><label>14</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Arends</surname><given-names>LR</given-names> </name><name name-style="western"><surname>Hamza</surname><given-names>TH</given-names> </name><name name-style="western"><surname>van Houwelingen</surname><given-names>JC</given-names> </name><name name-style="western"><surname>Heijenbrok-Kal</surname><given-names>MH</given-names> </name><name name-style="western"><surname>Hunink</surname><given-names>MG</given-names> </name><name name-style="western"><surname>Stijnen</surname><given-names>T</given-names> </name></person-group><article-title>Bivariate random effects meta-analysis of ROC curves</article-title><source>Med Decis Making</source><year>2008</year><volume>28</volume><issue>5</issue><fpage>621</fpage><lpage>638</lpage><pub-id pub-id-type="doi">10.1177/0272989X08319957</pub-id><pub-id pub-id-type="medline">18591542</pub-id></nlm-citation></ref><ref id="ref15"><label>15</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Higgins</surname><given-names>JP</given-names> </name><name name-style="western"><surname>Thompson</surname><given-names>SG</given-names> </name></person-group><article-title>Quantifying heterogeneity in a meta-analysis</article-title><source>Stat Med</source><year>2002</year><month>06</month><day>15</day><volume>21</volume><issue>11</issue><fpage>1539</fpage><lpage>1558</lpage><pub-id pub-id-type="doi">10.1002/sim.1186</pub-id><pub-id pub-id-type="medline">12111919</pub-id></nlm-citation></ref><ref id="ref16"><label>16</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Deeks</surname><given-names>JJ</given-names> </name><name name-style="western"><surname>Macaskill</surname><given-names>P</given-names> </name><name name-style="western"><surname>Irwig</surname><given-names>L</given-names> </name></person-group><article-title>The performance of tests of publication bias and other sample size effects in systematic reviews of diagnostic test accuracy was assessed</article-title><source>J Clin Epidemiol</source><year>2005</year><month>09</month><volume>58</volume><issue>9</issue><fpage>882</fpage><lpage>893</lpage><pub-id pub-id-type="doi">10.1016/j.jclinepi.2005.01.016</pub-id><pub-id pub-id-type="medline">16085191</pub-id></nlm-citation></ref><ref id="ref17"><label>17</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Morrell</surname><given-names>S</given-names> </name><name name-style="western"><surname>Hutel</surname><given-names>M</given-names> </name><name name-style="western"><surname>Lucena</surname><given-names>O</given-names> </name><etal/></person-group><article-title>Dynamic AI-assisted ipsilateral tissue matching for digital breast tomosynthesis</article-title><source>Eur J Radiol</source><year>2025</year><month>11</month><volume>192</volume><fpage>112356</fpage><pub-id pub-id-type="doi">10.1016/j.ejrad.2025.112356</pub-id><pub-id pub-id-type="medline">40925265</pub-id></nlm-citation></ref><ref id="ref18"><label>18</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Letter</surname><given-names>H</given-names> </name><name name-style="western"><surname>Peratikos</surname><given-names>M</given-names> </name><name name-style="western"><surname>Toledano</surname><given-names>A</given-names> </name><etal/></person-group><article-title>Use of artificial intelligence for digital breast tomosynthesis screening: a preliminary real-world experience</article-title><source>J Breast Imaging</source><year>2023</year><month>05</month><day>22</day><volume>5</volume><issue>3</issue><fpage>258</fpage><lpage>266</lpage><pub-id pub-id-type="doi">10.1093/jbi/wbad015</pub-id><pub-id pub-id-type="medline">38416890</pub-id></nlm-citation></ref><ref id="ref19"><label>19</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Uematsu</surname><given-names>T</given-names> </name><name name-style="western"><surname>Nakashima</surname><given-names>K</given-names> </name><name name-style="western"><surname>Harada</surname><given-names>TL</given-names> </name><name name-style="western"><surname>Nasu</surname><given-names>H</given-names> </name><name name-style="western"><surname>Igarashi</surname><given-names>T</given-names> </name></person-group><article-title>Artificial intelligence computer-aided detection enhances synthesized mammograms: comparison with original digital mammograms alone and in combination with tomosynthesis images in an experimental setting</article-title><source>Breast Cancer</source><year>2023</year><month>01</month><volume>30</volume><issue>1</issue><fpage>46</fpage><lpage>55</lpage><pub-id pub-id-type="doi">10.1007/s12282-022-01396-4</pub-id><pub-id pub-id-type="medline">36001270</pub-id></nlm-citation></ref><ref id="ref20"><label>20</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Dahlblom</surname><given-names>V</given-names> </name><name name-style="western"><surname>Andersson</surname><given-names>I</given-names> </name><name name-style="western"><surname>L&#x00E5;ng</surname><given-names>K</given-names> </name><name name-style="western"><surname>Tingberg</surname><given-names>A</given-names> </name><name name-style="western"><surname>Zackrisson</surname><given-names>S</given-names> </name><name name-style="western"><surname>Dustler</surname><given-names>M</given-names> </name></person-group><article-title>Artificial intelligence detection of missed cancers at digital mammography that were detected at digital breast tomosynthesis</article-title><source>Radiol Artif Intell</source><year>2021</year><volume>3</volume><issue>6</issue><fpage>e200299</fpage><pub-id pub-id-type="doi">10.1148/ryai.2021200299</pub-id><pub-id pub-id-type="medline">34870215</pub-id></nlm-citation></ref><ref id="ref21"><label>21</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Uematsu</surname><given-names>T</given-names> </name><name name-style="western"><surname>Nakashima</surname><given-names>K</given-names> </name><name name-style="western"><surname>Harada</surname><given-names>TL</given-names> </name><name name-style="western"><surname>Nasu</surname><given-names>H</given-names> </name><name name-style="western"><surname>Igarashi</surname><given-names>T</given-names> </name></person-group><article-title>Comparisons between artificial intelligence computer-aided detection synthesized mammograms and digital mammograms when used alone and in combination with tomosynthesis images in a virtual screening setting</article-title><source>Jpn J Radiol</source><year>2023</year><month>01</month><volume>41</volume><issue>1</issue><fpage>63</fpage><lpage>70</lpage><pub-id pub-id-type="doi">10.1007/s11604-022-01327-5</pub-id><pub-id pub-id-type="medline">36068450</pub-id></nlm-citation></ref><ref id="ref22"><label>22</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Larsen</surname><given-names>M</given-names> </name><name name-style="western"><surname>Aglen</surname><given-names>CF</given-names> </name><name name-style="western"><surname>Lee</surname><given-names>CI</given-names> </name><etal/></person-group><article-title>Artificial intelligence evaluation of 122 969 mammography examinations from a population-based screening program</article-title><source>Radiology</source><year>2022</year><month>06</month><volume>303</volume><issue>3</issue><fpage>502</fpage><lpage>511</lpage><pub-id pub-id-type="doi">10.1148/radiol.212381</pub-id><pub-id pub-id-type="medline">35348377</pub-id></nlm-citation></ref><ref id="ref23"><label>23</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Plimpton</surname><given-names>SR</given-names> </name><name name-style="western"><surname>Milch</surname><given-names>H</given-names> </name><name name-style="western"><surname>Sears</surname><given-names>C</given-names> </name><etal/></person-group><article-title>External validation of a commercial artificial intelligence algorithm on a diverse population for detection of false negative breast cancers</article-title><source>J Breast Imaging</source><year>2025</year><month>01</month><day>25</day><volume>7</volume><issue>1</issue><fpage>16</fpage><lpage>26</lpage><pub-id pub-id-type="doi">10.1093/jbi/wbae058</pub-id><pub-id pub-id-type="medline">39401322</pub-id></nlm-citation></ref><ref id="ref24"><label>24</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Dahlblom</surname><given-names>V</given-names> </name><name name-style="western"><surname>Dustler</surname><given-names>M</given-names> </name><name name-style="western"><surname>Zackrisson</surname><given-names>S</given-names> </name><name name-style="western"><surname>Tingberg</surname><given-names>A</given-names> </name></person-group><article-title>Workload reduction of digital breast tomosynthesis screening using artificial intelligence and synthetic mammography: a simulation study</article-title><source>J Med Imaging (Bellingham)</source><year>2025</year><month>11</month><volume>12</volume><issue>Suppl 2</issue><fpage>S22005</fpage><pub-id pub-id-type="doi">10.1117/1.JMI.12.S2.S22005</pub-id><pub-id pub-id-type="medline">40313361</pub-id></nlm-citation></ref><ref id="ref25"><label>25</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Berti</surname><given-names>A</given-names> </name><name name-style="western"><surname>Scapicchio</surname><given-names>C</given-names> </name><name name-style="western"><surname>Iacconi</surname><given-names>C</given-names> </name><etal/></person-group><article-title>An explainable-by-design end-to-end AI framework based on prototypical part learning for lesion detection and classification in Digital Breast Tomosynthesis images</article-title><source>Comput Struct Biotechnol J</source><year>2025</year><volume>27</volume><fpage>2649</fpage><lpage>2660</lpage><pub-id pub-id-type="doi">10.1016/j.csbj.2025.06.008</pub-id><pub-id pub-id-type="medline">40599244</pub-id></nlm-citation></ref><ref id="ref26"><label>26</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>van Winkel</surname><given-names>SL</given-names> </name><name name-style="western"><surname>Rodr&#x00ED;guez-Ruiz</surname><given-names>A</given-names> </name><name name-style="western"><surname>Appelman</surname><given-names>L</given-names> </name><etal/></person-group><article-title>Impact of artificial intelligence support on accuracy and reading time in breast tomosynthesis image interpretation: a multi-reader multi-case study</article-title><source>Eur Radiol</source><year>2021</year><month>11</month><volume>31</volume><issue>11</issue><fpage>8682</fpage><lpage>8691</lpage><pub-id pub-id-type="doi">10.1007/s00330-021-07992-w</pub-id><pub-id pub-id-type="medline">33948701</pub-id></nlm-citation></ref><ref id="ref27"><label>27</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Resch</surname><given-names>D</given-names> </name><name name-style="western"><surname>Lo Gullo</surname><given-names>R</given-names> </name><name name-style="western"><surname>Teuwen</surname><given-names>J</given-names> </name><etal/></person-group><article-title>AI-enhanced mammography with digital breast tomosynthesis for breast cancer detection: clinical value and comparison with human performance</article-title><source>Radiol Imaging Cancer</source><year>2024</year><month>07</month><volume>6</volume><issue>4</issue><fpage>e230149</fpage><pub-id pub-id-type="doi">10.1148/rycan.230149</pub-id><pub-id pub-id-type="medline">38995172</pub-id></nlm-citation></ref><ref id="ref28"><label>28</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Raya-Povedano</surname><given-names>JL</given-names> </name><name name-style="western"><surname>Romero-Mart&#x00ED;n</surname><given-names>S</given-names> </name><name name-style="western"><surname>El&#x00ED;as-Cabot</surname><given-names>E</given-names> </name><name name-style="western"><surname>Gubern-M&#x00E9;rida</surname><given-names>A</given-names> </name><name name-style="western"><surname>Rodr&#x00ED;guez-Ruiz</surname><given-names>A</given-names> </name><name name-style="western"><surname>&#x00C1;lvarez-Benito</surname><given-names>M</given-names> </name></person-group><article-title>AI-based strategies to reduce workload in breast cancer screening with mammography and tomosynthesis: a retrospective evaluation</article-title><source>Radiology</source><year>2021</year><month>07</month><volume>300</volume><issue>1</issue><fpage>57</fpage><lpage>65</lpage><pub-id pub-id-type="doi">10.1148/radiol.2021203555</pub-id><pub-id pub-id-type="medline">33944627</pub-id></nlm-citation></ref><ref id="ref29"><label>29</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Pinto</surname><given-names>MC</given-names> </name><name name-style="western"><surname>Rodriguez-Ruiz</surname><given-names>A</given-names> </name><name name-style="western"><surname>Pedersen</surname><given-names>K</given-names> </name><etal/></person-group><article-title>Impact of artificial intelligence decision support using deep learning on breast cancer screening interpretation with single-view wide-angle digital breast tomosynthesis</article-title><source>Radiology</source><year>2021</year><month>09</month><volume>300</volume><issue>3</issue><fpage>529</fpage><lpage>536</lpage><pub-id pub-id-type="doi">10.1148/radiol.2021204432</pub-id><pub-id pub-id-type="medline">34227882</pub-id></nlm-citation></ref><ref id="ref30"><label>30</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Park</surname><given-names>EK</given-names> </name><name name-style="western"><surname>Kwak</surname><given-names>SY</given-names> </name><name name-style="western"><surname>Lee</surname><given-names>W</given-names> </name><name name-style="western"><surname>Choi</surname><given-names>JS</given-names> </name><name name-style="western"><surname>Kooi</surname><given-names>T</given-names> </name><name name-style="western"><surname>Kim</surname><given-names>EK</given-names> </name></person-group><article-title>Impact of AI for digital breast tomosynthesis on breast cancer detection and interpretation time</article-title><source>Radiol Artif Intell</source><year>2024</year><month>05</month><volume>6</volume><issue>3</issue><fpage>e230318</fpage><pub-id pub-id-type="doi">10.1148/ryai.230318</pub-id><pub-id pub-id-type="medline">38568095</pub-id></nlm-citation></ref><ref id="ref31"><label>31</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kim</surname><given-names>JG</given-names> </name><name name-style="western"><surname>Haslam</surname><given-names>B</given-names> </name><name name-style="western"><surname>Diab</surname><given-names>AR</given-names> </name><etal/></person-group><article-title>Impact of a categorical AI system for digital breast tomosynthesis on breast cancer interpretation by both general radiologists and breast imaging specialists</article-title><source>Radiol Artif Intell</source><year>2024</year><month>03</month><volume>6</volume><issue>2</issue><fpage>e230137</fpage><pub-id pub-id-type="doi">10.1148/ryai.230137</pub-id><pub-id pub-id-type="medline">38323914</pub-id></nlm-citation></ref><ref id="ref32"><label>32</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>El&#x00ED;as-Cabot</surname><given-names>E</given-names> </name><name name-style="western"><surname>Romero-Mart&#x00ED;n</surname><given-names>S</given-names> </name><name name-style="western"><surname>Raya-Povedano</surname><given-names>JL</given-names> </name><name name-style="western"><surname>Brehl</surname><given-names>AK</given-names> </name><name name-style="western"><surname>&#x00C1;lvarez-Benito</surname><given-names>M</given-names> </name></person-group><article-title>Impact of real-life use of artificial intelligence as support for human reading in a population-based breast cancer screening program with mammography and tomosynthesis</article-title><source>Eur Radiol</source><year>2024</year><month>06</month><volume>34</volume><issue>6</issue><fpage>3958</fpage><lpage>3966</lpage><pub-id pub-id-type="doi">10.1007/s00330-023-10426-4</pub-id><pub-id pub-id-type="medline">37975920</pub-id></nlm-citation></ref><ref id="ref33"><label>33</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Conant</surname><given-names>EF</given-names> </name><name name-style="western"><surname>Toledano</surname><given-names>AY</given-names> </name><name name-style="western"><surname>Periaswamy</surname><given-names>S</given-names> </name><etal/></person-group><article-title>Improving accuracy and efficiency with concurrent use of artificial intelligence for digital breast tomosynthesis</article-title><source>Radiol Artif Intell</source><year>2019</year><month>07</month><day>31</day><volume>1</volume><issue>4</issue><fpage>e180096</fpage><pub-id pub-id-type="doi">10.1148/ryai.2019180096</pub-id><pub-id pub-id-type="medline">32076660</pub-id></nlm-citation></ref><ref id="ref34"><label>34</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Chen</surname><given-names>IE</given-names> </name><name name-style="western"><surname>Joines</surname><given-names>MM</given-names> </name><name name-style="western"><surname>Capiro</surname><given-names>N</given-names> </name><etal/></person-group><article-title>Commercial artificial intelligence versus radiologists: NPV and recall rate in large population-based digital mammography and tomosynthesis screening mammography cohorts</article-title><source>AJR Am J Roentgenol</source><year>2025</year><month>12</month><volume>225</volume><issue>6</issue><fpage>e2532889</fpage><pub-id pub-id-type="doi">10.2214/AJR.25.32889</pub-id><pub-id pub-id-type="medline">40899676</pub-id></nlm-citation></ref><ref id="ref35"><label>35</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Chae</surname><given-names>EY</given-names> </name><name name-style="western"><surname>Kim</surname><given-names>HH</given-names> </name><name name-style="western"><surname>Jeong</surname><given-names>JW</given-names> </name><name name-style="western"><surname>Chae</surname><given-names>SH</given-names> </name><name name-style="western"><surname>Lee</surname><given-names>S</given-names> </name><name name-style="western"><surname>Choi</surname><given-names>YW</given-names> </name></person-group><article-title>Decrease in interpretation time for both novice and experienced readers using a concurrent computer-aided detection system for digital breast tomosynthesis</article-title><source>Eur Radiol</source><year>2019</year><month>05</month><volume>29</volume><issue>5</issue><fpage>2518</fpage><lpage>2525</lpage><pub-id pub-id-type="doi">10.1007/s00330-018-5886-0</pub-id><pub-id pub-id-type="medline">30547203</pub-id></nlm-citation></ref><ref id="ref36"><label>36</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Balkenende</surname><given-names>L</given-names> </name><name name-style="western"><surname>Teuwen</surname><given-names>J</given-names> </name><name name-style="western"><surname>Mann</surname><given-names>RM</given-names> </name></person-group><article-title>Application of deep learning in breast cancer imaging</article-title><source>Semin Nucl Med</source><year>2022</year><month>09</month><volume>52</volume><issue>5</issue><fpage>584</fpage><lpage>596</lpage><pub-id pub-id-type="doi">10.1053/j.semnuclmed.2022.02.003</pub-id><pub-id pub-id-type="medline">35339259</pub-id></nlm-citation></ref><ref id="ref37"><label>37</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Abdullah</surname><given-names>KA</given-names> </name><name name-style="western"><surname>Marziali</surname><given-names>S</given-names> </name><name name-style="western"><surname>Nanaa</surname><given-names>M</given-names> </name><name name-style="western"><surname>Escudero S&#x00E1;nchez</surname><given-names>L</given-names> </name><name name-style="western"><surname>Payne</surname><given-names>NR</given-names> </name><name name-style="western"><surname>Gilbert</surname><given-names>FJ</given-names> </name></person-group><article-title>Deep learning-based breast cancer diagnosis in breast MRI: systematic review and meta-analysis</article-title><source>Eur Radiol</source><year>2025</year><month>08</month><volume>35</volume><issue>8</issue><fpage>4474</fpage><lpage>4489</lpage><pub-id pub-id-type="doi">10.1007/s00330-025-11406-6</pub-id><pub-id pub-id-type="medline">39907762</pub-id></nlm-citation></ref><ref id="ref38"><label>38</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Witowski</surname><given-names>J</given-names> </name><name name-style="western"><surname>Heacock</surname><given-names>L</given-names> </name><name name-style="western"><surname>Reig</surname><given-names>B</given-names> </name><etal/></person-group><article-title>Improving breast cancer diagnostics with deep learning for MRI</article-title><source>Sci Transl Med</source><year>2022</year><month>09</month><day>28</day><volume>14</volume><issue>664</issue><fpage>eabo4802</fpage><pub-id pub-id-type="doi">10.1126/scitranslmed.abo4802</pub-id><pub-id pub-id-type="medline">36170446</pub-id></nlm-citation></ref><ref id="ref39"><label>39</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Gu</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Xu</surname><given-names>W</given-names> </name><name name-style="western"><surname>Lin</surname><given-names>B</given-names> </name><etal/></person-group><article-title>Deep learning based on ultrasound images assists breast lesion diagnosis in China: a multicenter diagnostic study</article-title><source>Insights Imaging</source><year>2022</year><month>07</month><day>28</day><volume>13</volume><issue>1</issue><fpage>124</fpage><pub-id pub-id-type="doi">10.1186/s13244-022-01259-8</pub-id><pub-id pub-id-type="medline">35900608</pub-id></nlm-citation></ref><ref id="ref40"><label>40</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Lotter</surname><given-names>W</given-names> </name><name name-style="western"><surname>Diab</surname><given-names>AR</given-names> </name><name name-style="western"><surname>Haslam</surname><given-names>B</given-names> </name><etal/></person-group><article-title>Robust breast cancer detection in mammography and digital breast tomosynthesis using an annotation-efficient deep learning approach</article-title><source>Nat Med</source><year>2021</year><month>02</month><volume>27</volume><issue>2</issue><fpage>244</fpage><lpage>249</lpage><pub-id pub-id-type="doi">10.1038/s41591-020-01174-9</pub-id><pub-id pub-id-type="medline">33432172</pub-id></nlm-citation></ref><ref id="ref41"><label>41</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Xue</surname><given-names>P</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>J</given-names> </name><name name-style="western"><surname>Qin</surname><given-names>D</given-names> </name><etal/></person-group><article-title>Deep learning in image-based breast and cervical cancer detection: a systematic review and meta-analysis</article-title><source>NPJ Digit Med</source><year>2022</year><month>02</month><day>15</day><volume>5</volume><issue>1</issue><fpage>19</fpage><pub-id pub-id-type="doi">10.1038/s41746-022-00559-z</pub-id><pub-id pub-id-type="medline">35169217</pub-id></nlm-citation></ref><ref id="ref42"><label>42</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Tan</surname><given-names>H</given-names> </name><name name-style="western"><surname>Wu</surname><given-names>Q</given-names> </name><name name-style="western"><surname>Wu</surname><given-names>Y</given-names> </name><etal/></person-group><article-title>Mammography-based artificial intelligence for breast cancer detection, diagnosis, and BI-RADS categorization using multi-view and multi-level convolutional neural networks</article-title><source>Insights Imaging</source><year>2025</year><month>05</month><day>21</day><volume>16</volume><issue>1</issue><fpage>109</fpage><pub-id pub-id-type="doi">10.1186/s13244-025-01983-x</pub-id><pub-id pub-id-type="medline">40397242</pub-id></nlm-citation></ref><ref id="ref43"><label>43</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Galati</surname><given-names>F</given-names> </name><name name-style="western"><surname>Maroncelli</surname><given-names>R</given-names> </name><name name-style="western"><surname>De Nardo</surname><given-names>C</given-names> </name><etal/></person-group><article-title>Deep learning with transfer learning on digital breast tomosynthesis: a radiomics-based model for predicting breast cancer risk</article-title><source>Diagnostics (Basel)</source><year>2025</year><month>06</month><day>26</day><volume>15</volume><issue>13</issue><fpage>1631</fpage><pub-id pub-id-type="doi">10.3390/diagnostics15131631</pub-id><pub-id pub-id-type="medline">40647630</pub-id></nlm-citation></ref><ref id="ref44"><label>44</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Dan</surname><given-names>Q</given-names> </name><name name-style="western"><surname>Xu</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Burrows</surname><given-names>H</given-names> </name><name name-style="western"><surname>Bissram</surname><given-names>J</given-names> </name><name name-style="western"><surname>Stringer</surname><given-names>JS</given-names> </name><name name-style="western"><surname>Li</surname><given-names>Y</given-names> </name></person-group><article-title>Diagnostic performance of deep learning in ultrasound diagnosis of breast cancer: a systematic review</article-title><source>NPJ Precis Oncol</source><year>2024</year><month>01</month><day>27</day><volume>8</volume><fpage>21</fpage><pub-id pub-id-type="doi">10.1038/s41698-024-00514-z</pub-id></nlm-citation></ref><ref id="ref45"><label>45</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Yoon</surname><given-names>JH</given-names> </name><name name-style="western"><surname>Strand</surname><given-names>F</given-names> </name><name name-style="western"><surname>Baltzer</surname><given-names>PAT</given-names> </name><etal/></person-group><article-title>Standalone AI for breast cancer detection at screening digital mammography and digital breast tomosynthesis: a systematic review and meta-analysis</article-title><source>Radiology</source><year>2023</year><month>06</month><volume>307</volume><issue>5</issue><fpage>e222639</fpage><pub-id pub-id-type="doi">10.1148/radiol.222639</pub-id><pub-id pub-id-type="medline">37219445</pub-id></nlm-citation></ref><ref id="ref46"><label>46</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Rao</surname><given-names>VM</given-names> </name><name name-style="western"><surname>Hla</surname><given-names>M</given-names> </name><name name-style="western"><surname>Moor</surname><given-names>M</given-names> </name><etal/></person-group><article-title>Multimodal generative AI for medical image interpretation</article-title><source>Nature</source><year>2025</year><month>03</month><volume>639</volume><issue>8056</issue><fpage>888</fpage><lpage>896</lpage><pub-id pub-id-type="doi">10.1038/s41586-025-08675-y</pub-id><pub-id pub-id-type="medline">40140592</pub-id></nlm-citation></ref><ref id="ref47"><label>47</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sosna</surname><given-names>J</given-names> </name><name name-style="western"><surname>Joskowicz</surname><given-names>L</given-names> </name><name name-style="western"><surname>Saban</surname><given-names>M</given-names> </name></person-group><article-title>Navigating the AI landscape in medical imaging: a critical analysis of technologies, implementation, and implications</article-title><source>Radiology</source><year>2025</year><month>06</month><volume>315</volume><issue>3</issue><fpage>e240982</fpage><pub-id pub-id-type="doi">10.1148/radiol.240982</pub-id><pub-id pub-id-type="medline">40552997</pub-id></nlm-citation></ref><ref id="ref48"><label>48</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Champendal</surname><given-names>M</given-names> </name><name name-style="western"><surname>M&#x00FC;ller</surname><given-names>H</given-names> </name><name name-style="western"><surname>Prior</surname><given-names>JO</given-names> </name><name name-style="western"><surname>Dos Reis</surname><given-names>CS</given-names> </name></person-group><article-title>A scoping review of interpretability and explainability concerning artificial intelligence methods in medical imaging</article-title><source>Eur J Radiol</source><year>2023</year><month>12</month><volume>169</volume><fpage>111159</fpage><pub-id pub-id-type="doi">10.1016/j.ejrad.2023.111159</pub-id><pub-id pub-id-type="medline">37976760</pub-id></nlm-citation></ref></ref-list><app-group><supplementary-material id="app1"><label>Multimedia Appendix 1</label><p>Supplementary tables and figures for the systematic review and meta-analysis.</p><media xlink:href="jmir_v28i1e91659_app1.docx" xlink:title="DOCX File, 3202 KB"/></supplementary-material><supplementary-material id="app2"><label>Checklist 1</label><p>PRISMA checklist.</p><media xlink:href="jmir_v28i1e91659_app2.docx" xlink:title="DOCX File, 20 KB"/></supplementary-material></app-group></back></article>