<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="review-article"><front><journal-meta><journal-id journal-id-type="nlm-ta">J Med Internet Res</journal-id><journal-id journal-id-type="publisher-id">jmir</journal-id><journal-id journal-id-type="index">1</journal-id><journal-title>Journal of Medical Internet Research</journal-title><abbrev-journal-title>J Med Internet Res</abbrev-journal-title><issn pub-type="epub">1438-8871</issn><publisher><publisher-name>JMIR Publications</publisher-name><publisher-loc>Toronto, Canada</publisher-loc></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">v28i1e97174</article-id><article-id pub-id-type="doi">10.2196/97174</article-id><article-categories><subj-group subj-group-type="heading"><subject>Review</subject></subj-group></article-categories><title-group><article-title>Performance of Deep Learning in Classifying Age-Related Macular Degeneration From Images: Systematic Review and Meta-Analysis</article-title></title-group><contrib-group><contrib contrib-type="author"><name name-style="western"><surname>Zhu</surname><given-names>Yu</given-names></name><degrees>MD</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Niu</surname><given-names>Yue</given-names></name><degrees>BS</degrees><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Sun</surname><given-names>Shangye</given-names></name><degrees>MD</degrees><xref ref-type="aff" rid="aff3">3</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Liu</surname><given-names>Wei</given-names></name><degrees>BS</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Dou</surname><given-names>Ying</given-names></name><degrees>MD</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author" corresp="yes"><name name-style="western"><surname>Guo</surname><given-names>Yu</given-names></name><degrees>MD</degrees><xref ref-type="aff" rid="aff4">4</xref></contrib></contrib-group><aff id="aff1"><institution>Department of Ophthalmology, Jilin Province FAW General Hospital</institution><addr-line>Changchun</addr-line><country>China</country></aff><aff id="aff2"><institution>Department of Human Resources, Jilin Province FAW General Hospital</institution><addr-line>Changchun</addr-line><country>China</country></aff><aff id="aff3"><institution>Department of CT, Jilin Province FAW General Hospital</institution><addr-line>Changchun</addr-line><country>China</country></aff><aff id="aff4"><institution>Department of Otolaryngology, Jilin Province FAW General Hospital</institution><addr-line>2643 Dongfeng Street</addr-line><addr-line>Changchun, Jilin Province</addr-line><country>China</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Brini</surname><given-names>Stefano</given-names></name></contrib></contrib-group><contrib-group><contrib contrib-type="reviewer"><name name-style="western"><surname>Tsai</surname><given-names>Meng-Hsun</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Liang</surname><given-names>Xiaolong</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to Yu Guo, MD, Department of Otolaryngology, Jilin Province FAW General Hospital, 2643 Dongfeng StreetChangchun, Jilin Province, 130011, China, 86 15948784509; <email>ninanguoyu@163.com</email></corresp></author-notes><pub-date pub-type="collection"><year>2026</year></pub-date><pub-date pub-type="epub"><day>15</day><month>6</month><year>2026</year></pub-date><volume>28</volume><elocation-id>e97174</elocation-id><history><date date-type="received"><day>04</day><month>04</month><year>2026</year></date><date date-type="rev-recd"><day>11</day><month>05</month><year>2026</year></date><date date-type="accepted"><day>12</day><month>05</month><year>2026</year></date></history><copyright-statement>&#x00A9; Yu Zhu, Yue Niu, Shangye Sun, Wei Liu, Ying Dou, Yu Guo. Originally published in the Journal of Medical Internet Research (<ext-link ext-link-type="uri" xlink:href="https://www.jmir.org">https://www.jmir.org</ext-link>), 15.6.2026. </copyright-statement><copyright-year>2026</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in the Journal of Medical Internet Research (ISSN 1438-8871), is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="https://www.jmir.org/">https://www.jmir.org/</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://www.jmir.org/2026/1/e97174"/><abstract><sec><title>Background</title><p>Age-related macular degeneration (AMD) is a leading cause of irreversible blindness worldwide. Retinal imaging and deep learning (DL) may support scalable screening, but deployment requires evidence on pooled performance. This is important because missed neovascular disease may delay treatment, whereas excessive false positives may overload referral pathways.</p></sec><sec><title>Objective</title><p>This study aimed to compare the diagnostic performance of DL algorithms with ophthalmologists for detecting AMD and differentiating wet AMD (wAMD) from dry AMD (dAMD) and to identify factors that influence DL performance.</p></sec><sec sec-type="methods"><title>Methods</title><p>PubMed, Embase, Web of Science, and the Cochrane Library were searched through October 5, 2025, and updated on April 19, 2026. Eligible studies applied DL to classify AMD from normal retinas or wAMD from dAMD using retinal images. Two reviewers (MHT and XL) independently extracted data and assessed risk of bias using the Prediction model Risk Of Bias Assessment Tool for Artificial Intelligence (PROBAST+AI) tool. Pooled sensitivity, specificity, accuracy, and area under the curve were estimated using bivariate random-effects models. Clinician comparisons were stratified by experience (junior vs senior). Small-study effects were assessed via Deeks&#x2019; funnel plot asymmetry test. Evidence certainty was appraised using the Grading of Recommendations, Assessment, Development, and Evaluation framework. The protocol was registered in the International Prospective Register of Systematic Reviews (PROSPERO; CRD420251243276).</p></sec><sec sec-type="results"><title>Results</title><p>Overall, 28 studies were included, comprising 77,485 samples for AMD detection and 28,705 samples for wAMD versus dAMD classification. For AMD detection, DL achieved a pooled sensitivity of 0.98 (95% CI 0.96&#x2010;0.99; prediction interval [PI] 0.95&#x2010;0.99), specificity of 0.98 (95% CI 0.95&#x2010;0.99; PI 0.95&#x2010;0.99), accuracy of 0.97 (95% CI 0.96&#x2010;0.99), and area under the curve of 1.00 (95% CI 0.99&#x2010;1.00). For wAMD versus dAMD, DL showed sensitivity of 0.95 (95% CI 0.91&#x2010;0.97; PI 0.89&#x2010;0.97), specificity of 0.95 (95% CI 0.93&#x2010;0.97; PI 0.92&#x2010;0.97), accuracy of 0.95 (95% CI 0.92&#x2010;0.97), and area under the curve of 0.99 (95% CI 0.97&#x2010;0.99). DL showed higher sensitivity than senior ophthalmologists for AMD (0.98 vs 0.75; <italic>P</italic>&#x003C;.001) and higher specificity and accuracy than junior ophthalmologists for wAMD classification. Optical coherence tomography&#x2013;based models performed more consistently than color fundus photography or multimodal models. Evidence certainty was moderate.</p></sec><sec sec-type="conclusions"><title>Conclusions</title><p>Compared with ophthalmologists, DL algorithms demonstrated superior and more balanced diagnostic performance in the available head-to-head evidence, potentially providing a consistent decision-support baseline that mitigates human threshold-dependent trade-offs. However, high heterogeneity, wide PIs, predominantly retrospective designs, and possible performance inflation from internal validation mean that these relative performance findings remain preliminary rather than deployment ready. DL should be viewed as a triage adjunct requiring local calibration, not an autonomous diagnostic replacement. Prospective, multicenter, patient-level external validation with prespecified human comparison arms is required.</p></sec><sec><title>Trial Registration</title><p>PROSPERO CRD420251243276; https://www.crd.york.ac.uk/PROSPERO/view/CRD420251243276</p></sec></abstract><kwd-group><kwd>age-related macular degeneration</kwd><kwd>deep learning</kwd><kwd>artificial intelligence</kwd><kwd>optical coherence tomography</kwd><kwd>meta-analysis</kwd></kwd-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><p>Age-related macular degeneration (AMD) remains a leading cause of irreversible blindness in older individuals globally [<xref ref-type="bibr" rid="ref1">1</xref>]. Clinically, the disease is classified into dry AMD (dAMD), characterized by the progressive accumulation of drusen and geographic atrophy, and wet AMD (wAMD), which involves rapid vision loss due to macular neovascularization [<xref ref-type="bibr" rid="ref2">2</xref>]. As the global population ages, the prevalence of AMD is projected to rise significantly; recent estimates indicate that the number of individuals with AMD-related vision impairment will increase from 8.06 million in 2021 to approximately 21.34 million by 2050 [<xref ref-type="bibr" rid="ref1">1</xref>]. Consequently, early and accurate diagnosis is paramount. Timely detection allows for appropriate intervention, which is critical for slowing disease progression, preserving visual function, and improving overall patient prognosis.</p><p>Conventionally, color fundus photography (CFP) and optical coherence tomography (OCT) serve as the cornerstones for AMD screening and diagnosis. However, reliance on these modalities presents distinct challenges. CFP is frequently limited by image quality; issues such as media opacities or small pupils can render images ungradable, with rates as high as 47.6% in some screening contexts, and CFP often lacks sensitivity for detecting subtle early-stage structural changes or neovascular activity [<xref ref-type="bibr" rid="ref3">3</xref>,<xref ref-type="bibr" rid="ref4">4</xref>]. Conversely, while OCT offers high gradability (up to 97.7%) and detailed cross-sectional visualization, it is constrained by a limited field of view and reduced efficacy in identifying pigmentary abnormalities compared to CFP [<xref ref-type="bibr" rid="ref3">3</xref>,<xref ref-type="bibr" rid="ref4">4</xref>]. Beyond these technical constraints, the manual interpretation of vast imaging datasets is inherently labor-intensive, subjective, and prone to interobserver variability, creating a scalability bottleneck for population-wide screening.</p><p>In response to these challenges, deep learning (DL) algorithms using OCT, CFP, or multimodal imaging have emerged as a transformative approach, offering the potential for automated, high-throughput classification [<xref ref-type="bibr" rid="ref5">5</xref>,<xref ref-type="bibr" rid="ref6">6</xref>]. While these algorithms demonstrate theoretical superiority in efficiency and feature extraction, the current literature reveals substantial heterogeneity in performance outcomes [<xref ref-type="bibr" rid="ref7">7</xref>,<xref ref-type="bibr" rid="ref8">8</xref>]. Discrepancies regarding model generalization to real-world settings and the comparative performance of DL algorithms against ophthalmologists remain unresolved [<xref ref-type="bibr" rid="ref7">7</xref>,<xref ref-type="bibr" rid="ref8">8</xref>]. Two pivotal questions persist: First, how does the diagnostic performance of DL models quantitatively compare against ophthalmologists of varying expertise? Second, what factors, such as imaging modality, type of validation, database source, study centers, and unit of analysis, influence DL performance? Existing literature offers fragmented and sometimes contradictory insights, lacking a comprehensive quantitative synthesis.</p><p>Several previous meta-analyses have evaluated DL performance in AMD diagnosis. Leng et al [<xref ref-type="bibr" rid="ref9">9</xref>] reported a pooled sensitivity of 94% and specificity of 97% for convolutional neural network&#x2013;based algorithms, while Chen et al [<xref ref-type="bibr" rid="ref10">10</xref>] highlighted the overall superiority of artificial intelligence (AI) over retinal specialists. However, these prior reviews have notable limitations: they did not stratify human-AI comparisons by clinician experience level, used conventional bias assessment tools rather than the recently developed Prediction model Risk Of Bias Assessment Tool for Artificial Intelligence (PROBAST+AI) instrument [<xref ref-type="bibr" rid="ref11">11</xref>], and did not separately evaluate the clinically critical task of differentiating wAMD from dAMD. Moreover, the rapid advancement of DL architectures, particularly vision transformers, necessitates an updated quantitative synthesis incorporating the latest evidence.</p><p>Importantly, our review was designed to address several evidence gaps that were not fully covered in previous meta-analyses. First, instead of evaluating DL algorithms in isolation, we directly compared DL performance with ophthalmologists and further stratified these comparisons by clinician experience level. This is clinically relevant because screening and referral decisions are often made by clinicians with different levels of expertise. Second, we separately evaluated the classification of wAMD versus dAMD, a task with immediate therapeutic implications because delayed recognition of wAMD may postpone anti&#x2013;vascular endothelial growth factor treatment. Third, we incorporated PROBAST+AI, a recently developed tool tailored to prediction models using AI, thereby providing a more AI-specific assessment of bias than conventional quality appraisal tools [<xref ref-type="bibr" rid="ref12">12</xref>]. Fourth, we examined prediction intervals (PIs), validation strategy, imaging modality, and other sources of heterogeneity to move beyond average pooled performance and assess the likely robustness of DL algorithms across clinical settings. These features make the current review not only an update of the evidence base, but also a more deployment-oriented synthesis of the clinical value and limitations of DL for AMD image classification.</p><p>Therefore, this systematic review and meta-analysis addressed these clinically relevant and deployment-oriented evidence gaps. Its objective was to evaluate the diagnostic performance of DL algorithms compared with ophthalmologists of varying experience levels for detecting AMD and differentiating its subtypes (wAMD vs dAMD), and to assess potential factors influencing DL diagnostic performance through subgroup analyses and meta-regressions.</p></sec><sec id="s2" sec-type="methods"><title>Methods</title><sec id="s2-1"><title>Overview</title><p>This systematic review and meta-analysis was conducted in strict accordance with the PRISMA-DTA (Preferred Reporting Items for Systematic Reviews and Meta-Analyses of Diagnostic Test Accuracy) guidelines [<xref ref-type="bibr" rid="ref13">13</xref>], with the specific reporting items detailed in Table S1 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>. The abstract was reported in accordance with the PRISMA (Preferred Reporting Items for Systematic Reviews and Meta-Analyses) 2020 for abstracts checklist, as shown in Table S2 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>. This systematic review and meta-analysis adhered to the preregistered protocol (PROSPERO [International Prospective Register of Systematic Reviews] CRD420251243276). At the request of reviewers, PIs for sensitivity and specificity were calculated as an additional analysis to provide estimates of expected performance in new clinical settings [<xref ref-type="bibr" rid="ref13">13</xref>].</p></sec><sec id="s2-2"><title>Search Strategy</title><p>A comprehensive literature search was conducted across PubMed, Embase, Web of Science, and the Cochrane Library databases, with coverage extending through April 19, 2026. The initial search was conducted through October 5, 2025, and subsequently updated on April 19, 2026, to capture any recently published studies. Two independent reviewers (YZ and YN) performed the preliminary screening of titles and abstracts, followed by a full-text assessment. The search strategy used a combination of free-text terms and Medical Subject Headings focusing on four distinct domains: AMD-related terminologies (eg, &#x201C;Macular Degeneration&#x201D;), DL concepts (eg, &#x201C;Artificial Intelligence&#x201D; and &#x201C;Deep Learning&#x201D;), imaging modalities (eg, &#x201C;Optical Coherence Tomography&#x201D; and &#x201C;fundus photograph&#x201D;), and diagnostic performance metrics (eg, &#x201C;sensitivity&#x201D; and &#x201C;specificity&#x201D;). No restrictions regarding language or publication year were applied during the initial retrieval. To ensure exhaustiveness, reference lists of included studies and relevant meta-analyses were manually scrutinized for additional literature. Detailed search queries were provided in Table S3 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p></sec><sec id="s2-3"><title>Eligibility Criteria</title><p>The study inclusion adhered to the Patient, Index test, Target condition, Reference standard, Outcome, and Setting (PITROS) framework, which is detailed in <xref ref-type="table" rid="table1">Table 1</xref>. To ensure the analysis focused on diagnostic test accuracy, several exclusion criteria were systematically applied. We excluded publications with clearly irrelevant titles and abstracts, and specific noneligible study types, including reviews, cross-sectional surveys, case reports, conference abstracts, meta-analyses, letters, and studies with unavailable full texts. Furthermore, studies were excluded if their primary aim was not the classification of AMD versus Normal or wAMD versus dAMD, or if they lacked sufficient data to extract or calculate a 2&#x00D7;2 contingency table (true positives [TPs], false positives [FPs], false negatives [FNs], and true negatives [TNs]). The screening was performed independently by 2 reviewers (YZ and SYS). Any discrepancies were resolved through discussion or, if necessary, by consultation with a third senior reviewer (YG) to reach a final consensus.</p><table-wrap id="t1" position="float"><label>Table 1.</label><caption><p>Summary of inclusion criteria using the PITROS<sup><xref ref-type="table-fn" rid="table1fn1">a</xref></sup> framework.</p></caption><table id="table1" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Criteria</td><td align="left" valign="bottom">Details</td></tr></thead><tbody><tr><td align="left" valign="top">Participants (P)</td><td align="left" valign="top">Adults undergoing retinal imaging (CFP<sup><xref ref-type="table-fn" rid="table1fn2">b</xref></sup> or OCT<sup><xref ref-type="table-fn" rid="table1fn3">c</xref></sup>) with confirmed ocular status (normal, dAMD<sup><xref ref-type="table-fn" rid="table1fn4">d</xref></sup>, or wAMD<sup><xref ref-type="table-fn" rid="table1fn5">e</xref></sup>) based on clinical diagnosis or standard imaging protocols.</td></tr><tr><td align="left" valign="top">Index test (I)</td><td align="left" valign="top">DL<sup><xref ref-type="table-fn" rid="table1fn6">f</xref></sup> algorithms using retinal images (CFP, OCT, or both) for automated diagnosis or classification.</td></tr><tr><td align="left" valign="top">Target conditions (T)</td><td align="left" valign="top">The study addressed two classification tasks: first, AMD<sup><xref ref-type="table-fn" rid="table1fn7">g</xref></sup> versus normal, comparing confirmed AMD cases (positive) against healthy controls (negative); and second, wAMD versus dAMD, distinguishing exudative or neovascular AMD (positive) from nonexudative or atrophic AMD (negative).</td></tr><tr><td align="left" valign="top">Reference standard (R)</td><td align="left" valign="top">Clinical diagnosis by ophthalmologists based on multimodal imaging (eg, CFP, OCT, and fluorescein angiography) and/or longitudinal follow-up.</td></tr><tr><td align="left" valign="top">Outcomes (O)</td><td align="left" valign="top">Diagnostic performance metrics, including sensitivity, specificity, accuracy, and AUC<sup><xref ref-type="table-fn" rid="table1fn8">h</xref></sup>. Data extraction focused on contingency tables (TP<sup><xref ref-type="table-fn" rid="table1fn9">i</xref></sup>, FP<sup><xref ref-type="table-fn" rid="table1fn10">j</xref></sup>, FN<sup><xref ref-type="table-fn" rid="table1fn11">k</xref></sup>, and TN<sup><xref ref-type="table-fn" rid="table1fn12">l</xref></sup>).</td></tr><tr><td align="left" valign="top">Settings (S)</td><td align="left" valign="top">Retrospective or prospective studies using single-center, multicenter clinical datasets, or public databases (eg, AREDS<sup><xref ref-type="table-fn" rid="table1fn13">m</xref></sup> and Kaggle).</td></tr></tbody></table><table-wrap-foot><fn id="table1fn1"><p><sup>a</sup>PITROS: Patient, Index test, Target condition, Reference standard, Outcome, and Setting.</p></fn><fn id="table1fn2"><p><sup>b</sup>CFP: color fundus photography.</p></fn><fn id="table1fn3"><p><sup>c</sup>OCT: optical coherence tomography.</p></fn><fn id="table1fn4"><p><sup>d</sup>dAMD: dry age-related macular degeneration.</p></fn><fn id="table1fn5"><p><sup>e</sup>wAMD: wet age-related macular degeneration.</p></fn><fn id="table1fn6"><p><sup>f</sup>DL: deep learning.</p></fn><fn id="table1fn7"><p><sup>g</sup>AMD: age-related macular degeneration.</p></fn><fn id="table1fn8"><p><sup>h</sup>AUC: area under the curve.</p></fn><fn id="table1fn9"><p><sup>i</sup>TP: true positive.</p></fn><fn id="table1fn10"><p><sup>j</sup>FP: false positive.</p></fn><fn id="table1fn11"><p><sup>k</sup>FN: false negative.</p></fn><fn id="table1fn12"><p><sup>l</sup>TN: true negative.</p></fn><fn id="table1fn13"><p><sup>m</sup>AREDS: Age-Related Eye Disease Study.</p></fn></table-wrap-foot></table-wrap></sec><sec id="s2-4"><title>Quality Assessment and Certainty of Evidence</title><p>The methodological quality and risk of bias of the included studies were assessed using the PROBAST+AI tool [<xref ref-type="bibr" rid="ref11">11</xref>], an updated version replacing PROBAST 2019. This tool evaluates two distinct phases, model development and model evaluation, across seven domains each, encompassing participants, predictors, outcomes, and analysis. Each domain is judged as having a low, high, or unclear risk of bias based on a series of tailored signaling questions. These questions are rated as &#x201C;yes,&#x201D; &#x201C;probably yes,&#x201D; &#x201C;probably no,&#x201D; &#x201C;no,&#x201D; &#x201C;no information,&#x201D; or &#x201C;not applicable&#x201D;. The complete set of signaling questions and detailed ratings were provided in Tables S4 and S5 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>. To ensure objectivity and accuracy, 2 reviewers (YZ and WL) independently performed this assessment for all included studies.</p><p>To appraise the certainty of the evidence for the pooled sensitivity, specificity, and diagnostic accuracy, we used the Grading of Recommendations, Assessment, Development, and Evaluations (GRADE) framework for diagnostic studies. This approach focuses on five key domains: risk of bias, indirectness, inconsistency, imprecision, and small-study effects [<xref ref-type="bibr" rid="ref13">13</xref>]. The GRADE summary of findings table was formatted according to the diagnostic test accuracy template recommended by the GRADE working group and used a pretest probability of 20% for expected results per 1000 tested. The full GRADE assessment criteria and the final judgments for each outcome were detailed in Table S6 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p></sec><sec id="s2-5"><title>Data Extraction</title><p>Two reviewers (YZ and YD) independently performed data extraction from the full-text articles, and disagreements were resolved by discussion with a third reviewer (YG). Extracted information included study design, patient or sample size, imaging modality, data source, validation design, reference standard, target condition, AI architecture, diagnostic contingency data or reconstructed diagnostic data, ophthalmologist experience level when available, and risk-of-bias and certainty judgments. The data extraction tables are included in the paper.</p><p>As most studies did not report the full binary diagnostic contingency table (2&#x00D7;2 table), we used an indirect derivation approach. Specifically, TP, FP, FN, and TN values were extracted by merging categories from multiclass confusion matrices (three- or four-class tables) reported in the included studies. In a few cases where such matrices were unavailable, the values were indirectly calculated using reported sensitivity, specificity, AMD sample size, and total sample size. When studies provided multiple, nonoverlapping validation sets, the corresponding contingency tables were assumed independent, and all were extracted. However, when multiple DL algorithms were presented within a single study, only the major model defined by the authors was extracted to avoid patient overlap in data pooling. To ensure the independence of data in our meta-analyses and avoid double counting, this approach guaranteed that the same patient sample was not counted more than once.</p></sec><sec id="s2-6"><title>Outcome Measures</title><p>The primary outcome measures were pooled sensitivity, specificity, accuracy, and area under the curve (AUC). Sensitivity and specificity describe threshold-dependent diagnostic performance, accuracy summarizes the proportion of correctly classified samples in the analyzed dataset and may be affected by class balance, and AUC reflects threshold-independent discrimination across possible decision thresholds. These metrics were therefore interpreted as complementary rather than interchangeable indicators of model behavior.</p></sec><sec id="s2-7"><title>Statistical Analysis</title><p>Considering the inherent heterogeneity anticipated among studies, a bivariate random-effects model was used to pool the sensitivity, specificity, and AUC values for both DL algorithms and ophthalmologists [<xref ref-type="bibr" rid="ref14">14</xref>]. For the diagnostic accuracy metric, a generalized linear mixed model with a random-effects framework was used following a rlogit transformation. A 2-sample Z-test was used to compare the differences in pooled sensitivity, specificity, diagnostic accuracy, and AUC, with statistical significance defined as a <italic>P</italic> value &#x003C;.05. It is important to note that this bivariate model uses restricted maximum likelihood estimation, which differs fundamentally from the DerSimonian-Laird approach used in standard pairwise meta-analyses. While the Hartung-Knapp-Sidik-Jonkman adjustment is recommended for DL-based analyses to reduce false positives [<xref ref-type="bibr" rid="ref15">15</xref>], it is not directly applicable to the bivariate diagnostic framework, as the restricted maximum likelihood&#x2013;based bivariate model already provides more accurate variance estimation that inherently guards against inflated type I error rates. To characterize the distribution of true effects across different populations and settings, 95% PIs were calculated where a sufficient number of studies (&#x2265;3) were available, complementing the confidence intervals for the pooled average effects [<xref ref-type="bibr" rid="ref16">16</xref>]. While confidence intervals quantify the precision of the average effect, PIs estimate the range within which the true diagnostic performance of a future study is expected to fall.</p><p>For DL algorithm outcomes demonstrating substantial heterogeneity, a bivariate boxplot and multivariable meta-regression were performed to explore potential sources. As prespecified, subgroup analyses were conducted based on imaging modality (OCT vs CFP vs multimodal), with between-subgroup differences compared and visualized using violin plots. The potential clinical impact of the DL algorithms was assessed using a Fagan nomogram. Small-study effects were evaluated using Deeks&#x2019; funnel plot asymmetry test, with a <italic>P</italic> value &#x003C;.10 indicating potential asymmetry [<xref ref-type="bibr" rid="ref17">17</xref>]. It should be noted that funnel plot asymmetry can arise from multiple sources beyond small-study effects, including differences in study quality, true heterogeneity, and chance [<xref ref-type="bibr" rid="ref17">17</xref>,<xref ref-type="bibr" rid="ref18">18</xref>]. All statistical analyses were performed using Stata 15.1 (StataCorp LLC, with the <italic>midas</italic> and <italic>metadta</italic> packages) and R (version 4.5.1; R Core Team, using the <italic>ggplot2</italic> and <italic>tidyverse</italic> packages). All statistical tests were 2-tailed.</p></sec><sec id="s2-8"><title>Use of Large Language Models</title><p>During the preparation of this work, we used OpenAI Codex (GPT-5) to assist with text generation, proofreading and editing, summarizing text, formulation of conclusions, translation, and reformatting under full human supervision. The tool was not used to make eligibility decisions, extract data, perform statistical analyses, or draw independent scientific conclusions. After using this tool, we reviewed and edited the content as needed and took full responsibility for the content of the publication.</p></sec></sec><sec id="s3" sec-type="results"><title>Results</title><sec id="s3-1"><title>Study Selection</title><p>The initial database search identified 3586 potentially relevant records. After removing duplicates, 1980 unique records underwent title and abstract screening. During this phase, 1861 records were excluded due to obvious irrelevance or ineligible publication types (eg, reviews and conference abstracts). Subsequently, 119 full-text articles were assessed for eligibility. Following a detailed review, 61 studies were excluded as they did not primarily focus on distinguishing AMD from normal retinas or discriminating wAMD from dAMD. One cross-sectional study and 32 studies lacking sufficient or complete diagnostic data (TP, FP, FN, and TN) were further excluded. An additional 3 articles identified from other nondatabase sources (eg, reference lists) were also included [<xref ref-type="bibr" rid="ref19">19</xref>-<xref ref-type="bibr" rid="ref21">21</xref>]. Consequently, 28 studies [<xref ref-type="bibr" rid="ref19">19</xref>-<xref ref-type="bibr" rid="ref46">46</xref>] met all predefined inclusion criteria and were included in the meta-analysis. The study selection process followed the PRISMA guidelines, as shown in <xref ref-type="fig" rid="figure1">Figure 1</xref>.</p><fig position="float" id="figure1"><label>Figure 1.</label><caption><p>PRISMA (Preferred Reporting Items for Systematic Reviews and Meta-Analyses) flow diagram illustrating the study selection process for the systematic review and meta-analysis. AMD: age-related macular degeneration; dAMD: dry age-related macular degeneration; FN: false negative; FP: false positive; TN: true negative; TP: true positive; wAMD: wet age-related macular degeneration.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="jmir_v28i1e97174_fig01.png"/></fig></sec><sec id="s3-2"><title>Study Characteristics</title><p>A total of 27 studies [<xref ref-type="bibr" rid="ref19">19</xref>-<xref ref-type="bibr" rid="ref36">36</xref>,<xref ref-type="bibr" rid="ref38">38</xref>-<xref ref-type="bibr" rid="ref46">46</xref>], comprising a validation set of 77,485 samples, targeted the classification of AMD versus normal. Among these, diagnostic performance data were available for junior ophthalmologists in 1 study [<xref ref-type="bibr" rid="ref24">24</xref>], and for senior ophthalmologists in 3 studies [<xref ref-type="bibr" rid="ref20">20</xref>,<xref ref-type="bibr" rid="ref24">24</xref>,<xref ref-type="bibr" rid="ref36">36</xref>]. Ten studies were based on OCT imaging [<xref ref-type="bibr" rid="ref23">23</xref>,<xref ref-type="bibr" rid="ref25">25</xref>-<xref ref-type="bibr" rid="ref28">28</xref>,<xref ref-type="bibr" rid="ref32">32</xref>,<xref ref-type="bibr" rid="ref34">34</xref>,<xref ref-type="bibr" rid="ref35">35</xref>,<xref ref-type="bibr" rid="ref40">40</xref>,<xref ref-type="bibr" rid="ref44">44</xref>], 12 on CFP [<xref ref-type="bibr" rid="ref19">19</xref>-<xref ref-type="bibr" rid="ref21">21</xref>,<xref ref-type="bibr" rid="ref24">24</xref>,<xref ref-type="bibr" rid="ref29">29</xref>-<xref ref-type="bibr" rid="ref31">31</xref>,<xref ref-type="bibr" rid="ref33">33</xref>,<xref ref-type="bibr" rid="ref36">36</xref>,<xref ref-type="bibr" rid="ref38">38</xref>,<xref ref-type="bibr" rid="ref39">39</xref>,<xref ref-type="bibr" rid="ref41">41</xref>], and 4 on multimodal (OCT+CFP) inputs [<xref ref-type="bibr" rid="ref42">42</xref>,<xref ref-type="bibr" rid="ref43">43</xref>,<xref ref-type="bibr" rid="ref45">45</xref>,<xref ref-type="bibr" rid="ref46">46</xref>]. Detailed study, patient, and technical characteristics were presented in <xref ref-type="table" rid="table2">Table 2</xref> and Tables S7-S8 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p><p>For the classification of wAMD versus dAMD, 16 studies (validation set: 28,705 samples) were included [<xref ref-type="bibr" rid="ref19">19</xref>,<xref ref-type="bibr" rid="ref23">23</xref>-<xref ref-type="bibr" rid="ref29">29</xref>,<xref ref-type="bibr" rid="ref31">31</xref>,<xref ref-type="bibr" rid="ref33">33</xref>,<xref ref-type="bibr" rid="ref37">37</xref>,<xref ref-type="bibr" rid="ref41">41</xref>-<xref ref-type="bibr" rid="ref44">44</xref>,<xref ref-type="bibr" rid="ref46">46</xref>]. Data for head-to-head comparisons with ophthalmologists were available from two studies each for junior and senior practitioners [<xref ref-type="bibr" rid="ref19">19</xref>,<xref ref-type="bibr" rid="ref24">24</xref>], enabling stratified analysis by experience level. Six studies used OCT [<xref ref-type="bibr" rid="ref23">23</xref>,<xref ref-type="bibr" rid="ref25">25</xref>-<xref ref-type="bibr" rid="ref28">28</xref>,<xref ref-type="bibr" rid="ref44">44</xref>], 7 used CFP [<xref ref-type="bibr" rid="ref19">19</xref>,<xref ref-type="bibr" rid="ref24">24</xref>,<xref ref-type="bibr" rid="ref29">29</xref>,<xref ref-type="bibr" rid="ref31">31</xref>,<xref ref-type="bibr" rid="ref33">33</xref>,<xref ref-type="bibr" rid="ref37">37</xref>,<xref ref-type="bibr" rid="ref41">41</xref>], and 3 used multimodal imaging [<xref ref-type="bibr" rid="ref42">42</xref>,<xref ref-type="bibr" rid="ref43">43</xref>,<xref ref-type="bibr" rid="ref46">46</xref>]. Corresponding detailed characteristics were provided in <xref ref-type="table" rid="table2">Table 2</xref> and Tables S9-S10 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>. The complete diagnostic data for ophthalmologists were presented in Table S11 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p><table-wrap id="t2" position="float"><label>Table 2.</label><caption><p>Study and patient characteristics of the included studies.</p></caption><table id="table2" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Author</td><td align="left" valign="bottom">Year</td><td align="left" valign="bottom">Country</td><td align="left" valign="bottom">Study design</td><td align="left" valign="bottom">Analysis</td><td align="left" valign="bottom">Reference standard</td><td align="left" valign="bottom">Target condition</td><td align="left" valign="bottom" colspan="3">Number of total sample size</td><td align="left" valign="bottom">Number of positive sample size</td></tr><tr><td align="left" valign="bottom"/><td align="left" valign="bottom"/><td align="left" valign="bottom"/><td align="left" valign="bottom"/><td align="left" valign="bottom"/><td align="left" valign="bottom"/><td align="left" valign="bottom"/><td align="left" valign="bottom">Training</td><td align="left" valign="bottom">IV<sup><xref ref-type="table-fn" rid="table2fn1">a</xref></sup></td><td align="left" valign="bottom">EV<sup><xref ref-type="table-fn" rid="table2fn2">b</xref></sup></td><td align="left" valign="bottom"/></tr></thead><tbody><tr><td align="left" valign="top">Abdelhalim et al [<xref ref-type="bibr" rid="ref22">22</xref>]</td><td align="left" valign="top">2025</td><td align="left" valign="top">Multiple countries</td><td align="left" valign="top">Retrospective</td><td align="left" valign="top">IB<sup><xref ref-type="table-fn" rid="table2fn3">c</xref></sup></td><td align="left" valign="top">&#x2014;<sup><xref ref-type="table-fn" rid="table2fn4">d</xref></sup></td><td align="left" valign="top">AMD<sup><xref ref-type="table-fn" rid="table2fn5">e</xref></sup> versus normal</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>1011</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>127</p></list-item></list></td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Training: 589</p></list-item><list-item><p>IV: 74</p></list-item></list></td></tr><tr><td align="left" valign="top">Bao et al [<xref ref-type="bibr" rid="ref24">24</xref>]</td><td align="char" char="." valign="top">2025</td><td align="left" valign="top">China</td><td align="left" valign="top">Retrospective</td><td align="left" valign="top">IB</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top"/><td align="char" char="." valign="top"><list list-type="bullet"><list-item><p>894</p></list-item></list></td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top">AMD versus normal</td><td align="left" valign="top"/><td align="left" valign="top"><list list-type="bullet"><list-item><p>IV1: 223</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>EV1: 1395</p></list-item><list-item><p>EV2: 59</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Training: 406</p></list-item><list-item><p>IV1: 101</p></list-item><list-item><p>EV1: 194</p></list-item><list-item><p>EV2: 40</p></list-item></list></td></tr><tr><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top">wAMD<sup><xref ref-type="table-fn" rid="table2fn6">f</xref></sup> versus dAMD<sup><xref ref-type="table-fn" rid="table2fn7">g</xref></sup></td><td align="left" valign="top"/><td align="left" valign="top"><list list-type="bullet"><list-item><p>IV1: 92</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>EV1: 136</p></list-item><list-item><p>EV2: 40</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Training: 267</p></list-item><list-item><p>IV1: 60</p></list-item><list-item><p>EV1: 36</p></list-item><list-item><p>EV2: 21</p></list-item></list></td></tr><tr><td align="left" valign="top">Durmaz Engin et al [<xref ref-type="bibr" rid="ref28">28</xref>]</td><td align="left" valign="top">2025</td><td align="left" valign="top">NA</td><td align="left" valign="top">Retrospective</td><td align="left" valign="top">IB</td><td align="left" valign="top">Expert consensus</td><td align="left" valign="top">AMD versus normal</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>1200</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>300</p></list-item></list></td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Training: 800</p></list-item><list-item><p>IV: 200</p></list-item></list></td></tr><tr><td align="left" valign="top">Zhen et al [<xref ref-type="bibr" rid="ref46">46</xref>]</td><td align="char" char="." valign="top">2025</td><td align="left" valign="top">China</td><td align="left" valign="top">Retrospective</td><td align="left" valign="top">PB<sup><xref ref-type="table-fn" rid="table2fn8">h</xref></sup></td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top"/><td align="left" valign="top"><list list-type="bullet"><list-item><p>Training 1: 664</p></list-item></list></td><td align="left" valign="top"/><td align="left" valign="top">&#x2014;</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top">AMD versus normal</td><td align="left" valign="top"/><td align="left" valign="top"><list list-type="bullet"><list-item><p>134</p></list-item></list></td><td align="left" valign="top"/><td align="left" valign="top"><list list-type="bullet"><list-item><p>Training 1: 509</p></list-item><list-item><p>IV: 101</p></list-item></list></td></tr><tr><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top">wAMD versus dAMD</td><td align="left" valign="top"/><td align="left" valign="top"><list list-type="bullet"><list-item><p>101</p></list-item></list></td><td align="left" valign="top"/><td align="left" valign="top"><list list-type="bullet"><list-item><p>Training: 386</p></list-item><list-item><p>IV: 75</p></list-item></list></td></tr><tr><td align="left" valign="top">Alenezi et al [<xref ref-type="bibr" rid="ref23">23</xref>]</td><td align="char" char="." valign="top">2024</td><td align="left" valign="top">NA</td><td align="left" valign="top">Retrospective</td><td align="left" valign="top">IB</td><td align="left" valign="top">Expert consensus</td><td align="left" valign="top"/><td align="char" char="." valign="top"><list list-type="bullet"><list-item><p>554</p></list-item></list></td><td align="left" valign="top"/><td align="left" valign="top">&#x2014;</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top">AMD versus normal</td><td align="left" valign="top"/><td align="left" valign="top"><list list-type="bullet"><list-item><p>554</p></list-item></list></td><td align="left" valign="top"/><td align="left" valign="top"><list list-type="bullet"><list-item><p>Training: 367</p></list-item><list-item><p>IV: 367</p></list-item></list></td></tr><tr><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top">wAMD versus dAMD</td><td align="left" valign="top"/><td align="left" valign="top"><list list-type="bullet"><list-item><p>361</p></list-item></list></td><td align="left" valign="top"/><td align="left" valign="top"><list list-type="bullet"><list-item><p>Training: 173</p></list-item><list-item><p>IV: 173</p></list-item></list></td></tr><tr><td align="left" valign="top">Garc&#x00ED;a-Floriano et al [<xref ref-type="bibr" rid="ref30">30</xref>]</td><td align="left" valign="top">2024</td><td align="left" valign="top">Multiple countries</td><td align="left" valign="top">Retrospective</td><td align="left" valign="top">IB</td><td align="left" valign="top">Expert consensus</td><td align="left" valign="top">AMD versus normal</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>250</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>250</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>22</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Training: 128</p></list-item><list-item><p>IV: 128</p></list-item><list-item><p>EV: 11</p></list-item></list></td></tr><tr><td align="left" valign="top">Le et al [<xref ref-type="bibr" rid="ref33">33</xref>]</td><td align="char" char="." valign="top">2024</td><td align="left" valign="top">Multiple countries</td><td align="left" valign="top">Retrospective</td><td align="left" valign="top">IB</td><td align="left" valign="top">Expert consensus</td><td align="left" valign="top"/><td align="char" char="." valign="top"><list list-type="bullet"><list-item><p>2359</p></list-item></list></td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top">AMD versus normal</td><td align="left" valign="top"/><td align="left" valign="top"><list list-type="bullet"><list-item><p>2359</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>750</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Training: 1144</p></list-item><list-item><p>IV: 1144</p></list-item><list-item><p>EV: 500</p></list-item></list></td></tr><tr><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top">wAMD versus dAMD</td><td align="left" valign="top"/><td align="left" valign="top"><list list-type="bullet"><list-item><p>1095</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>440</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Training: 599</p></list-item><list-item><p>IV: 588</p></list-item><list-item><p>EV: 239</p></list-item></list></td></tr><tr><td align="left" valign="top">Oliveira et al [<xref ref-type="bibr" rid="ref36">36</xref>]</td><td align="left" valign="top">2024</td><td align="left" valign="top">Multiple countries</td><td align="left" valign="top">Retrospective</td><td align="left" valign="top">IB</td><td align="left" valign="top">Expert consensus</td><td align="left" valign="top">AMD versus normal</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>6896</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>210</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>80</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Training: 275</p></list-item><list-item><p>IV: 105</p></list-item><list-item><p>EV: 40</p></list-item></list></td></tr><tr><td align="left" valign="top">Wan et al [<xref ref-type="bibr" rid="ref41">41</xref>]</td><td align="char" char="." valign="top">2024</td><td align="left" valign="top">China</td><td align="left" valign="top">Retrospective</td><td align="left" valign="top">IB</td><td align="left" valign="top">Clinical classification manifestations and expert consensus</td><td align="left" valign="top"/><td align="char" char="." valign="top"><list list-type="bullet"><list-item><p>516</p></list-item></list></td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top">AMD versus normal</td><td align="left" valign="top"/><td align="left" valign="top"><list list-type="bullet"><list-item><p>129</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>100</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Training: 312</p></list-item><list-item><p>IV: 78</p></list-item><list-item><p>EV: 60</p></list-item></list></td></tr><tr><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top">wAMD versus dAMD</td><td align="left" valign="top"/><td align="left" valign="top"><list list-type="bullet"><list-item><p>78</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>60</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Training: 214</p></list-item><list-item><p>IV: 53</p></list-item><list-item><p>EV: 41</p></list-item></list></td></tr><tr><td align="left" valign="top">Yusufoglu et al [<xref ref-type="bibr" rid="ref44">44</xref>]</td><td align="char" char="." valign="top">2024</td><td align="left" valign="top">Turkey</td><td align="left" valign="top">Retrospective</td><td align="left" valign="top">IB</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top">&#x2014;</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top">AMD versus normal</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Training 1: 1622</p></list-item><list-item><p>Training 2: 2240</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>IV1: 347</p></list-item><list-item><p>IV2: 491</p></list-item></list></td><td align="left" valign="top"/><td align="left" valign="top"><list list-type="bullet"><list-item><p>Training 1: NA</p></list-item><list-item><p>IV1: 214</p></list-item><list-item><p>Training 2: NA</p></list-item><list-item><p>IV2: 248</p></list-item></list></td></tr><tr><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top">wAMD versus dAMD</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>1622</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>213</p></list-item></list></td><td align="left" valign="top"/><td align="left" valign="top"><list list-type="bullet"><list-item><p>Training 1: NA</p></list-item><list-item><p>IV1: 110</p></list-item></list></td></tr><tr><td align="left" valign="top">Celebi et al [<xref ref-type="bibr" rid="ref26">26</xref>]</td><td align="char" char="." valign="top">2023</td><td align="left" valign="top">Turkey</td><td align="left" valign="top">Retrospective</td><td align="left" valign="top">IB</td><td align="left" valign="top">Expert consensus</td><td align="left" valign="top"/><td align="left" valign="top"><list list-type="bullet"><list-item><p>Training 1: 4067</p></list-item><list-item><p>Training 2: 59,139</p></list-item></list></td><td align="left" valign="top"/><td align="left" valign="top">&#x2014;</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top">AMD versus normal</td><td align="left" valign="top"/><td align="left" valign="top"><list list-type="bullet"><list-item><p>IV1: 1741</p></list-item><list-item><p>IV2: 25,345</p></list-item></list></td><td align="left" valign="top"/><td align="left" valign="top"><list list-type="bullet"><list-item><p>Training 1: 2577</p></list-item><list-item><p>IV1: 1103</p></list-item><list-item><p>Training 2: 40,544</p></list-item><list-item><p>IV2: 17,375</p></list-item></list></td></tr><tr><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top">wAMD versus dAMD</td><td align="left" valign="top"/><td align="left" valign="top"><list list-type="bullet"><list-item><p>IV1: 1085</p></list-item><list-item><p>IV2: 17,369</p></list-item></list></td><td align="left" valign="top"/><td align="left" valign="top"><list list-type="bullet"><list-item><p>Training 1: 812</p></list-item><list-item><p>IV1: 345</p></list-item><list-item><p>Training 2: 26,219</p></list-item><list-item><p>IV2: 11,234</p></list-item></list></td></tr><tr><td align="left" valign="top">El-Den et al [<xref ref-type="bibr" rid="ref29">29</xref>]</td><td align="char" char="." valign="top">2023</td><td align="left" valign="top">Multiple countries</td><td align="left" valign="top">Retrospective</td><td align="left" valign="top">IB</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top"/><td align="char" char="." valign="top"><list list-type="bullet"><list-item><p>605</p></list-item></list></td><td align="left" valign="top"/><td align="left" valign="top">&#x2014;</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top">AMD versus normal</td><td align="left" valign="top"/><td align="left" valign="top"><list list-type="bullet"><list-item><p>128</p></list-item></list></td><td align="left" valign="top"/><td align="left" valign="top"><list list-type="bullet"><list-item><p>Training: 454</p></list-item><list-item><p>IV: 98</p></list-item></list></td></tr><tr><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top">wAMD versus dAMD</td><td align="left" valign="top"/><td align="left" valign="top"><list list-type="bullet"><list-item><p>97</p></list-item></list></td><td align="left" valign="top"/><td align="left" valign="top"><list list-type="bullet"><list-item><p>Training: 151</p></list-item><list-item><p>IV: 32</p></list-item></list></td></tr><tr><td align="left" valign="top">Leingang et al [<xref ref-type="bibr" rid="ref35">35</xref>]</td><td align="left" valign="top">2023</td><td align="left" valign="top">Multicountries</td><td align="left" valign="top">Retrospective</td><td align="left" valign="top">PB</td><td align="left" valign="top">Expert consensus</td><td align="left" valign="top">AMD versus normal</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>1733</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>96</p></list-item></list></td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Training: 1620</p></list-item><list-item><p>IV: 90</p></list-item></list></td></tr><tr><td align="left" valign="top">Chen et al [<xref ref-type="bibr" rid="ref27">27</xref>]</td><td align="char" char="." valign="top">2022</td><td align="left" valign="top">China</td><td align="left" valign="top">Retrospective</td><td align="left" valign="top">EB<sup><xref ref-type="table-fn" rid="table2fn9">i</xref></sup></td><td align="left" valign="top">Expert consensus</td><td align="left" valign="top"/><td align="char" char="." valign="top"><list list-type="bullet"><list-item><p>612</p></list-item></list></td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top">AMD versus normal</td><td align="left" valign="top"/><td align="left" valign="top"><list list-type="bullet"><list-item><p>153</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>214</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Training: 356</p></list-item><list-item><p>IV: 89</p></list-item><list-item><p>EV: 162</p></list-item></list></td></tr><tr><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top">wAMD versus dAMD</td><td align="left" valign="top"/><td align="left" valign="top"><list list-type="bullet"><list-item><p>87</p></list-item></list></td><td align="left" valign="top">143</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Training: 189</p></list-item><list-item><p>IV: 47</p></list-item><list-item><p>EV: 96</p></list-item></list></td></tr><tr><td align="left" valign="top">He et al [<xref ref-type="bibr" rid="ref32">32</xref>]</td><td align="left" valign="top">2022</td><td align="left" valign="top">China</td><td align="left" valign="top">Retrospective</td><td align="left" valign="top">IB<sup><xref ref-type="table-fn" rid="table2fn3">c</xref></sup></td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">AMD versus normal</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>77,568</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>750</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>2130</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Training: 36,656</p></list-item><list-item><p>IV: 500</p></list-item><list-item><p>EV: 723</p></list-item></list></td></tr><tr><td align="left" valign="top">Skevas et al [<xref ref-type="bibr" rid="ref21">21</xref>]</td><td align="left" valign="top">2022</td><td align="left" valign="top">Germany</td><td align="left" valign="top">Prospective</td><td align="left" valign="top">PB<sup><xref ref-type="table-fn" rid="table2fn8">h</xref></sup></td><td align="left" valign="top">Expert consensus</td><td align="left" valign="top">AMD versus normal</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>598</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>EV: 69</p></list-item></list></td></tr><tr><td align="left" valign="top">Wang et al [<xref ref-type="bibr" rid="ref42">42</xref>]</td><td align="char" char="." valign="top">2022</td><td align="left" valign="top">China</td><td align="left" valign="top">Retrospective</td><td align="left" valign="top">EB</td><td align="left" valign="top">Expert consensus</td><td align="left" valign="top"/><td align="left" valign="top">&#x2014;</td><td align="left" valign="top"/><td align="left" valign="top">&#x2014;</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top">AMD versus normal</td><td align="left" valign="top"/><td align="left" valign="top"><list list-type="bullet"><list-item><p>91</p></list-item></list></td><td align="left" valign="top"/><td align="left" valign="top"><list list-type="bullet"><list-item><p>Training: NA</p></list-item><list-item><p>IV: 71</p></list-item></list></td></tr><tr><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top">wAMD versus dAMD</td><td align="left" valign="top"/><td align="left" valign="top"><list list-type="bullet"><list-item><p>71</p></list-item></list></td><td align="left" valign="top"/><td align="left" valign="top"><list list-type="bullet"><list-item><p>Training: NA</p></list-item><list-item><p>IV: 33</p></list-item></list></td></tr><tr><td align="left" valign="top">Tak et al [<xref ref-type="bibr" rid="ref37">37</xref>]</td><td align="left" valign="top">2021</td><td align="left" valign="top">United States</td><td align="left" valign="top">Retrospective</td><td align="left" valign="top">IB</td><td align="left" valign="top">Expert consensus</td><td align="left" valign="top">wAMD versus dAMD</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>350</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>72</p></list-item></list></td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Training: NA</p></list-item><list-item><p>IV: 28</p></list-item></list></td></tr><tr><td align="left" valign="top">Takhchidi et al [<xref ref-type="bibr" rid="ref38">38</xref>]</td><td align="left" valign="top">2021</td><td align="left" valign="top">Russia</td><td align="left" valign="top">Retrospective</td><td align="left" valign="top">IB</td><td align="left" valign="top">Expert consensus</td><td align="left" valign="top">AMD versus normal</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>994</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>206</p></list-item></list></td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Training: 475</p></list-item><list-item><p>IV: 100</p></list-item></list></td></tr><tr><td align="left" valign="top">Thomas et al [<xref ref-type="bibr" rid="ref40">40</xref>]</td><td align="left" valign="top">2021</td><td align="left" valign="top">India</td><td align="left" valign="top">Retrospective</td><td align="left" valign="top">IB</td><td align="left" valign="top">Expert consensus</td><td align="left" valign="top">AMD versus normal</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>87,264</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>750</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>30</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Training: 41,238</p></list-item><list-item><p>IV: 500</p></list-item><list-item><p>EV: 15</p></list-item></list></td></tr><tr><td align="left" valign="top">Heo et al [<xref ref-type="bibr" rid="ref19">19</xref>]</td><td align="char" char="." valign="top">2020</td><td align="left" valign="top">Korea</td><td align="left" valign="top">Retrospective</td><td align="left" valign="top">IB</td><td align="left" valign="top">Expert consensus</td><td align="left" valign="top"/><td align="char" char="." valign="top"><list list-type="bullet"><list-item><p>279</p></list-item></list></td><td align="char" char="." valign="top"><list list-type="bullet"><list-item><p>279</p></list-item></list></td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top">AMD versus normal</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"><list list-type="bullet"><list-item><p>Training: 191</p></list-item><list-item><p>IV: 191</p></list-item></list></td></tr><tr><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top">wAMD versus dAMD</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"><list list-type="bullet"><list-item><p>Training: 99</p></list-item><list-item><p>IV: 99</p></list-item></list></td></tr><tr><td align="left" valign="top">Zapata et al [<xref ref-type="bibr" rid="ref45">45</xref>]</td><td align="left" valign="top">2020</td><td align="left" valign="top">Multicountries</td><td align="left" valign="top">Retrospective</td><td align="left" valign="top">IB</td><td align="left" valign="top">Expert consensus</td><td align="left" valign="top">AMD versus normal</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>7949</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>2208</p></list-item></list></td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Training: NA</p></list-item><list-item><p>IV: 1082</p></list-item></list></td></tr><tr><td align="left" valign="top">Bhatia et al [<xref ref-type="bibr" rid="ref25">25</xref>]</td><td align="char" char="." valign="top">2019</td><td align="left" valign="top">Multiple countries</td><td align="left" valign="top">Retrospective</td><td align="left" valign="top">EB</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top"/><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top">AMD versus normal</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"><list list-type="bullet"><list-item><p>EV1:98</p></list-item><list-item><p>EV1:75</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>EV1: 48</p></list-item><list-item><p>EV2: 50</p></list-item></list></td></tr><tr><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top">wAMD versus dAMD</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"><list list-type="bullet"><list-item><p>EV2:50</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>EV2: 25</p></list-item></list></td></tr><tr><td align="left" valign="top">Matsuba et al [<xref ref-type="bibr" rid="ref20">20</xref>]</td><td align="left" valign="top">2019</td><td align="left" valign="top">Japan</td><td align="left" valign="top">Retrospective</td><td align="left" valign="top">IB</td><td align="left" valign="top">Expert consensus</td><td align="left" valign="top">AMD versus normal</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>5000</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>111</p></list-item></list></td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Training: 870</p></list-item><list-item><p>IV: 42</p></list-item></list></td></tr><tr><td align="left" valign="top">Yoo et al [<xref ref-type="bibr" rid="ref43">43</xref>]</td><td align="char" char="." valign="top">2019</td><td align="left" valign="top">Korea</td><td align="left" valign="top">Retrospective</td><td align="left" valign="top">IB</td><td align="left" valign="top">Pathological examination</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top">AMD versus normal</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Training 1: 2100</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>IV1: 900</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>EV2: 83</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Training 1: 1400</p></list-item><list-item><p>IV1: 600</p></list-item><list-item><p>EV2: 48</p></list-item></list></td></tr><tr><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top">wAMD versus dAMD</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>EV2:48</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>EV2: 36</p></list-item></list></td></tr><tr><td align="left" valign="top">Grassmann et al [<xref ref-type="bibr" rid="ref31">31</xref>]</td><td align="char" char="." valign="top">2018</td><td align="left" valign="top">Multiple countries</td><td align="left" valign="top">Prospective</td><td align="left" valign="top">IB</td><td align="left" valign="top">Expert consensus</td><td align="left" valign="top"/><td align="char" char="." valign="top"><list list-type="bullet"><list-item><p>83,653</p></list-item></list></td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top">AMD versus normal</td><td align="left" valign="top"/><td align="left" valign="top">11,618</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>1677</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Training: 53,375</p></list-item><list-item><p>IV: 7571</p></list-item><list-item><p>EV: 220</p></list-item></list></td></tr><tr><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top">wAMD versus dAMD</td><td align="left" valign="top"/><td align="left" valign="top"><list list-type="bullet"><list-item><p>6631</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>123</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Training: 9357</p></list-item><list-item><p>IV: 1432</p></list-item><list-item><p>EV: 4</p></list-item></list></td></tr><tr><td align="left" valign="top">Tan et al [<xref ref-type="bibr" rid="ref39">39</xref>]</td><td align="left" valign="top">2018</td><td align="left" valign="top">Multicountries</td><td align="left" valign="top">Retrospective</td><td align="left" valign="top">IB</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">AMD versus normal</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>1110</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>1100</p></list-item></list></td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Training: 708</p></list-item><list-item><p>IV: 700</p></list-item></list></td></tr><tr><td align="left" valign="top">Lee et al [<xref ref-type="bibr" rid="ref34">34</xref>]</td><td align="left" valign="top">2017</td><td align="left" valign="top">United States</td><td align="left" valign="top">Retrospective</td><td align="left" valign="top">IB</td><td align="left" valign="top">Expert consensus</td><td align="left" valign="top">AMD versus normal</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>80,839</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>20,163</p></list-item></list></td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Training: 41,074</p></list-item><list-item><p>IV: 11,616</p></list-item></list></td></tr></tbody></table><table-wrap-foot><fn id="table2fn1"><p><sup>a</sup>IV: internal validation.</p></fn><fn id="table2fn2"><p><sup>b</sup>EV: external validation.</p></fn><fn id="table2fn3"><p><sup>c</sup>IB: image-based.</p></fn><fn id="table2fn4"><p><sup>d</sup>NA: not available.</p></fn><fn id="table2fn5"><p><sup>e</sup>AMD: age-related macular degeneration.</p></fn><fn id="table2fn6"><p><sup>f</sup>wAMD: wet age-related macular degeneration.</p></fn><fn id="table2fn7"><p><sup>g</sup>dAMD: dry age-related macular degeneration.</p></fn><fn id="table2fn8"><p><sup>h</sup>PB: patient-based.</p></fn><fn id="table2fn9"><p><sup>i</sup>EB: eye-based.</p></fn></table-wrap-foot></table-wrap></sec><sec id="s3-3"><title>Quality Assessment and GRADE Certainty</title><p>The risk of bias and applicability concerns, as assessed by the PROBAST+AI tool, were summarized in <xref ref-type="fig" rid="figure2">Figure 2</xref> and Tables S4-S5 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>. For the model development phase, 14% (4/28) of studies were judged to have a high overall risk of bias regarding quality [<xref ref-type="bibr" rid="ref23">23</xref>,<xref ref-type="bibr" rid="ref25">25</xref>,<xref ref-type="bibr" rid="ref28">28</xref>,<xref ref-type="bibr" rid="ref30">30</xref>], while none (0/28) raised high applicability concerns. For the model validation or testing phase, 25% (7/28) of studies were rated as having a high overall risk of bias [<xref ref-type="bibr" rid="ref23">23</xref>,<xref ref-type="bibr" rid="ref25">25</xref>,<xref ref-type="bibr" rid="ref28">28</xref>,<xref ref-type="bibr" rid="ref30">30</xref>,<xref ref-type="bibr" rid="ref35">35</xref>,<xref ref-type="bibr" rid="ref37">37</xref>,<xref ref-type="bibr" rid="ref40">40</xref>], again with no studies (0/28) presenting high applicability concerns. Overall, the proportion of high-risk ratings was low, with most domains assessed as low risk, indicating an acceptable overall quality of the included literature.</p><fig position="float" id="figure2"><label>Figure 2.</label><caption><p>Risk of bias and applicability concerns of the included studies regarding model development and model evaluation domains using the Prediction model Risk Of Bias ASsessment Tool for artificial intelligence tools. The colors represent the proportion of studies with low, high, or unclear risk. PROBAST+AI: Prediction model Risk Of Bias ASsessment Tool for artificial intelligence.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="jmir_v28i1e97174_fig02.png"/></fig><p>Using the GRADE framework, the certainty of evidence for the two primary diagnostic tasks was rated as moderate (<xref ref-type="table" rid="table3">Table 3</xref> and Table S6 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). Following the GRADE Summary of Findings format for diagnostic test accuracy, <xref ref-type="table" rid="table3">Table 3</xref> reports pooled sensitivity and specificity, expected results per 1000 tested at a 20% pretest probability, PIs, certainty ratings, and plain-language interpretation. The detailed data extraction tables and PROBAST+AI risk-of-bias tables are retained in the Supplementary Materials because of their length (Tables S4-S5 and S7-S10 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>).</p><p>In <xref ref-type="table" rid="table3">Table 3</xref>, expected results were calculated per 1000 tested at an illustrative pretest probability of 20% (200 with the target condition and 800 without); this assumed prevalence may vary across clinical settings. Diagnostic accuracy was treated as a surrogate for patient-important consequences because direct evidence on whether artificial intelligence&#x2013;assisted testing improves visual outcomes, referral burden, or treatment timing was unavailable. For age-related macular degeneration versus normal, the target condition was age-related macular degeneration; for wet age-related macular degeneration versus dry age-related macular degeneration, the target condition was wet age-related macular degeneration.</p><table-wrap id="t3" position="float"><label>Table 3.</label><caption><p>GRADE<sup><xref ref-type="table-fn" rid="table3fn1">a</xref></sup> summary of findings table for deep learning&#x2013;based AMD<sup><xref ref-type="table-fn" rid="table3fn2">b</xref></sup> image classification.</p></caption><table id="table3" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">x test</td><td align="left" valign="bottom">No. of studies and validation datasets</td><td align="left" valign="bottom">Sensitivity (95% CI)</td><td align="left" valign="bottom">Specificity (95% CI)</td><td align="left" valign="bottom">Expected results per 1000 tested at 20% prevalence</td><td align="left" valign="bottom">95% prediction interval</td><td align="left" valign="bottom">Certainty</td><td align="left" valign="bottom">Plain-language interpretation</td></tr></thead><tbody><tr><td align="left" valign="top">DL<sup><xref ref-type="table-fn" rid="table3fn3">c</xref></sup> for AMD versus normal</td><td align="left" valign="top">27 studies [<xref ref-type="bibr" rid="ref19">19</xref>-<xref ref-type="bibr" rid="ref36">36</xref>,<xref ref-type="bibr" rid="ref38">38</xref>-<xref ref-type="bibr" rid="ref46">46</xref>] and 37 validation datasets</td><td align="left" valign="top">0.98 (0.96&#x2010;0.99)</td><td align="left" valign="top">0.98 (0.95&#x2010;0.99)</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>TP<sup><xref ref-type="table-fn" rid="table3fn4">d</xref></sup> 196</p></list-item><list-item><p>FN<sup><xref ref-type="table-fn" rid="table3fn5">e</xref></sup> 4</p></list-item><list-item><p>TN<sup><xref ref-type="table-fn" rid="table3fn6">f</xref></sup> 784</p></list-item><list-item><p>FP<sup><xref ref-type="table-fn" rid="table3fn7">g</xref></sup> 16</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Sensitivity 0.95&#x2010;0.99</p></list-item><list-item><p>Specificity 0.95&#x2010;0.99</p></list-item></list></td><td align="left" valign="top">Moderate<sup><xref ref-type="table-fn" rid="table3fn8">h</xref></sup></td><td align="left" valign="top">At a 20% pretest probability, DL would correctly identify most AMD cases and correctly rule out most non-AMD eyes or images. The prediction intervals indicate that performance may vary across settings.</td></tr><tr><td align="left" valign="top">DL for wAMD<sup><xref ref-type="table-fn" rid="table3fn9">i</xref></sup> versus dAMD<sup><xref ref-type="table-fn" rid="table3fn10">j</xref></sup></td><td align="left" valign="top">16 studies [<xref ref-type="bibr" rid="ref19">19</xref>,<xref ref-type="bibr" rid="ref23">23</xref>-<xref ref-type="bibr" rid="ref29">29</xref>,<xref ref-type="bibr" rid="ref31">31</xref>,<xref ref-type="bibr" rid="ref33">33</xref>,<xref ref-type="bibr" rid="ref37">37</xref>,<xref ref-type="bibr" rid="ref41">41</xref>-<xref ref-type="bibr" rid="ref44">44</xref>,<xref ref-type="bibr" rid="ref46">46</xref>] and 22 validation datasets</td><td align="left" valign="top">0.95 (0.91&#x2010;0.97)</td><td align="left" valign="top">0.95 (0.93&#x2010;0.97)</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>TP 190 FN 10</p></list-item><list-item><p>TN 760</p></list-item><list-item><p>FP 40</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Sensitivity 0.89&#x2010;0.97</p></list-item><list-item><p>Specificity 0.92&#x2010;0.97</p></list-item></list></td><td align="left" valign="top">Moderate<sup><xref ref-type="table-fn" rid="table3fn8">h</xref></sup></td><td align="left" valign="top">At a 20% pretest probability, DL would correctly identify most wAMD cases and correctly classify most dAMD cases. The wider prediction interval for sensitivity supports cautious local validation before deployment.</td></tr></tbody></table><table-wrap-foot><fn id="table3fn1"><p><sup>a</sup>GRADE: Grading of Recommendations, Assessment, Development, and Evaluation.</p></fn><fn id="table3fn2"><p><sup>b</sup>AMD: age-related macular degeneration.</p></fn><fn id="table3fn3"><p><sup>c</sup>DL: deep learning.</p></fn><fn id="table3fn4"><p><sup>d</sup>TP: true positive.</p></fn><fn id="table3fn5"><p><sup>e</sup>FN: false negative.</p></fn><fn id="table3fn6"><p><sup>f</sup>TN: true negative.</p></fn><fn id="table3fn7"><p><sup>g</sup>FP: false positive.</p></fn><fn id="table3fn8"><p><sup>h</sup>Downgraded one GRADE level for risk of bias because most included studies were retrospective, many relied on internal validation or incompletely reported patient-level separation or reference standards, and PROBAST+AI identified high risk of bias in a subset of validation or testing studies; detailed domain-level judgments are provided in Table S6 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p></fn><fn id="table3fn9"><p><sup>i</sup>wAMD: wet age-related macular degeneration.</p></fn><fn id="table3fn10"><p><sup>j</sup>dAMD: dry age-related macular degeneration.</p></fn></table-wrap-foot></table-wrap></sec><sec id="s3-4"><title>Goodness-of-Fit and Model Diagnostics</title><p>The goodness-of-fit and bivariate normality of the bivariate mixed-effects regression models were evaluated through graphical inspection (Figures S1 and S2 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). The goodness-of-fit plots (Figures S1A and S2A in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>), illustrating the normal probability of deviance residuals, demonstrated that the observed data points adhered closely to the reference diagonal line, indicating a robust model fit. Similarly, the bivariate normality plots (Figures S1B and S2B in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>) revealed a linear alignment of data points within the chi-square probability plots, confirming that the random effects of sensitivity and specificity followed a bivariate normal distribution. Collectively, these diagnostic assessments substantiate the validity and statistical robustness of the models used in this systematic review and meta-analysis.</p></sec><sec id="s3-5"><title>DL Algorithms Versus Ophthalmologists for AMD Versus Normal Classification</title><p>The pooled diagnostic performance of image-based DL algorithms for distinguishing AMD from normal retinas was high across threshold-dependent metrics and threshold-independent discrimination. Sensitivity and specificity quantify performance at the diagnostic thresholds reported by individual studies, accuracy reflects overall correct classification in the analyzed datasets, and AUC describes discrimination across possible thresholds. The primary forest plots with PIs are shown in <xref ref-type="fig" rid="figure3">Figure 3</xref>, and the intervals should be interpreted as indicating expected between-setting variability rather than only statistical uncertainty around the pooled estimate.</p><p>Comparative analysis revealed that the pooled sensitivity of DL algorithms was significantly higher than that of senior ophthalmologists (0.98 vs 0.75; Z=4.94; <italic>P</italic>&#x003C;.001), as was the pooled accuracy (0.97 vs 0.83; Z=4.43; <italic>P</italic>&#x003C;.001). These results were detailed in <xref ref-type="fig" rid="figure4">Figure 4</xref> and Figures S5 and S6 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>. Notably, the comparison with junior ophthalmologists for AMD detection was limited to a single study [<xref ref-type="bibr" rid="ref24">24</xref>], precluding robust statistical inference for this subgroup.</p><fig position="float" id="figure3"><label>Figure 3.</label><caption><p>Primary forest plots for pooled sensitivity, specificity, and accuracy of deep learning algorithms for classifying age-related macular degeneration from normal retinas. Prediction intervals are printed in the plots to show expected between-setting variability [<xref ref-type="bibr" rid="ref19">19</xref>-<xref ref-type="bibr" rid="ref36">36</xref>,<xref ref-type="bibr" rid="ref38">38</xref>-<xref ref-type="bibr" rid="ref46">46</xref>].</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="jmir_v28i1e97174_fig03.png"/></fig><fig position="float" id="figure4"><label>Figure 4.</label><caption><p>Violin plots comparing the diagnostic performance of deep learning algorithms versus junior and senior ophthalmologists. The top row displays the sensitivity, specificity, and accuracy for classifying age-related macular degeneration versus normal, while the bottom row displays the performance for classifying wet age-related macular degeneration versus dry age-related macular degeneration. The scattered dots represent individual study estimates, and the internal box plots indicate the median and interquartile range. <italic>P</italic> values indicate the statistical significance of the comparisons. AMD: age-related macular degeneration; dAMD: dry age-related macular degeneration; DL: deep learning; wAMD: wet age-related macular degeneration.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="jmir_v28i1e97174_fig04.png"/></fig></sec><sec id="s3-6"><title>DL Algorithms Versus Ophthalmologists for wAMD Versus dAMD Classification</title><p>For the classification of wAMD versus dAMD, DL algorithms again showed high pooled sensitivity, specificity, accuracy, and AUC, but these metrics describe different behaviors. Accuracy summarizes correct classification within the included datasets, whereas AUC reflects discrimination across thresholds and may remain high even when real-world threshold selection, disease spectrum, or image quality differs. The primary forest plots with PIs are shown in <xref ref-type="fig" rid="figure5">Figure 5</xref>.</p><p>Comparative analyses showed that, within the limited head-to-head datasets, DL algorithms had higher pooled specificity (0.95 vs 0.53; Z=3.49; <italic>P</italic>&#x003C;.001) and diagnostic accuracy (0.95 vs 0.75; Z=6.48; <italic>P</italic>&#x003C;.001) than junior ophthalmologists. Compared with senior ophthalmologists, DL algorithms had higher pooled sensitivity (0.95 vs 0.67; Z=2.58; <italic>P</italic>=.009) and diagnostic accuracy (0.95 vs 0.88; Z=2.90; <italic>P</italic>=.003). All comparisons are shown in <xref ref-type="fig" rid="figure4">Figure 4</xref> and Figures S5 and S6 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>. These findings suggest a possible relative performance advantage for DL in selected metrics, but the small number of clinician-comparison studies means that the results should be interpreted as preliminary rather than definitive.</p><fig position="float" id="figure5"><label>Figure 5.</label><caption><p>Primary forest plots for pooled sensitivity, specificity, and accuracy of deep learning algorithms for classifying wet age-related macular degeneration from dry age-related macular degeneration. Prediction intervals are printed in the plots to show expected between-setting variability [<xref ref-type="bibr" rid="ref19">19</xref>,<xref ref-type="bibr" rid="ref23">23</xref>-<xref ref-type="bibr" rid="ref29">29</xref>,<xref ref-type="bibr" rid="ref31">31</xref>,<xref ref-type="bibr" rid="ref33">33</xref>,<xref ref-type="bibr" rid="ref37">37</xref>,<xref ref-type="bibr" rid="ref41">41</xref>-<xref ref-type="bibr" rid="ref44">44</xref>,<xref ref-type="bibr" rid="ref46">46</xref>].</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="jmir_v28i1e97174_fig05.png"/></fig></sec><sec id="s3-7"><title>Subgroup Analysis for AMD Versus Normal Classification</title><p>Subgroup analysis based on imaging modality, as illustrated in <xref ref-type="fig" rid="figure6">Figure 6</xref> and Figures S9-S17 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>, revealed statistically significant performance differences among DL algorithms. Specifically, OCT-based DL algorithms demonstrated significantly higher pooled specificity compared to CFP-based algorithms (0.99 vs 0.94; Z=2.92; <italic>P</italic>=.003). Furthermore, OCT-based algorithms achieved significantly higher pooled accuracy (0.99 vs 0.94; Z=3.36; <italic>P</italic>&#x003C;.001) and a significantly higher pooled AUC value (1.00 vs 0.98; Z=3.50, <italic>P</italic>&#x003C;.001) than CFP-based DL algorithms.</p><fig position="float" id="figure6"><label>Figure 6.</label><caption><p>Subgroup analysis of deep learning algorithms based on imaging modalities. The violin plots illustrate the distribution of sensitivity, specificity, accuracy, and area under the curve for optical coherence tomography-based, color fundus photography-based, and multimodal models in classifying age-related macular degeneration versus normal (top row) and wet age-related macular degeneration versus dry age-related macular degeneration (bottom row). The scattered dots represent individual study estimates, and the internal box plots indicate the median and interquartile range. <italic>P</italic> values indicate the statistical significance of the comparisons. AMD: age-related macular degeneration; AUC: area under the curve; CFP: color fundus photography; dAMD: dry age-related macular degeneration; DL: deep learning; OCT: optical coherence tomography; wAMD: wet age-related macular degeneration.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="jmir_v28i1e97174_fig06.png"/></fig></sec><sec id="s3-8"><title>Subgroup Analysis for wAMD Versus dAMD Classification</title><p>The subgroup analysis by imaging modality for wAMD versus dAMD classification was presented in <xref ref-type="fig" rid="figure6">Figure 6</xref> and Figures S18-S27 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>. OCT-based DL algorithms demonstrated significantly higher pooled sensitivity than CFP-based algorithms (0.97 vs 0.89; Z=2.25; <italic>P</italic>=.02). Their pooled accuracy was also significantly higher (0.97 vs 0.91; Z=2.73; <italic>P</italic>=.006), as was their pooled AUC value (0.99 vs 0.97; Z=2.17; <italic>P</italic>=.02).</p></sec><sec id="s3-9"><title>Heterogeneity Investigation: Bivariate Boxplot and Meta-Regression</title><p>To explore the substantial statistical heterogeneity observed, meta-regression and bivariate boxplot analyses were conducted. For the AMD versus normal classification, meta-regression indicated that the type of validation (internal validation vs external validation), database source (open database vs private database), and study centers (single center vs multicenter) were potential sources of heterogeneity (Table S12 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). The bivariate boxplot suggested that the studies by Skevas et al [<xref ref-type="bibr" rid="ref21">21</xref>], Celebi et al [<xref ref-type="bibr" rid="ref26">26</xref>], Grassmann et al [<xref ref-type="bibr" rid="ref31">31</xref>], and Yoo et al [<xref ref-type="bibr" rid="ref43">43</xref>], might be influential outliers contributing to the heterogeneity (<xref ref-type="fig" rid="figure7">Figure 7A</xref>).</p><p>For the wAMD versus dAMD classification, meta-regression identified the type of validation (internal validation vs external validation), type of imaging (unimodal vs multimodal), database source (open database vs private database), and study centers (single center vs multicenter) design as potential moderators explaining heterogeneity (Table S13 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). The corresponding bivariate boxplot highlighted the studies by El-Den et al [<xref ref-type="bibr" rid="ref29">29</xref>], Le et al [<xref ref-type="bibr" rid="ref33">33</xref>], and Wang et al [<xref ref-type="bibr" rid="ref42">42</xref>] as potential outliers influencing the pooled estimates (<xref ref-type="fig" rid="figure7">Figure 7B</xref>).</p><fig position="float" id="figure7"><label>Figure 7.</label><caption><p>Assessment of heterogeneity and small-study effects. (A and B) Bivariate boxplots identifying potential outliers and influential studies for age-related macular degeneration versus normal and wet age-related macular degeneration versus dry age-related macular degeneration classifications. Studies falling outside the colored ellipses are considered outliers. (C and D) Deeks&#x2019; funnel plots evaluating small-study effects for the two classification tasks; a <italic>P</italic> value &#x003E;.10 indicates no significant small-study effects. AMD: age-related macular degeneration; dAMD: dry age-related macular degeneration; wAMD: wet age-related macular degeneration [<xref ref-type="bibr" rid="ref21">21</xref>,<xref ref-type="bibr" rid="ref26">26</xref>,<xref ref-type="bibr" rid="ref29">29</xref>,<xref ref-type="bibr" rid="ref31">31</xref>,<xref ref-type="bibr" rid="ref33">33</xref>,<xref ref-type="bibr" rid="ref42">42</xref>,<xref ref-type="bibr" rid="ref43">43</xref>].</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="jmir_v28i1e97174_fig07.png"/></fig></sec><sec id="s3-10"><title>Sensitivity Analysis</title><p>Sensitivity analyses were conducted to assess the robustness of our findings. Whether excluding studies flagged as high-risk in the validation set by the PROBAST+AI tool, or removing the identified outliers and influential data points, the variations in diagnostic performance were stable (Tables S14 and S15 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). The results remained consistent with the primary analysis, demonstrating that the conclusions are robust and not disproportionately driven by these extreme studies.</p></sec><sec id="s3-11"><title>Small-Study Effects and Clinical Applicability</title><p>Deeks&#x2019; funnel plot asymmetry test indicated no evidence of significant small-study effects for either AMD classification task (<italic>P</italic>=.26 and 0.18; <xref ref-type="fig" rid="figure7">Figures 7C and 7D</xref>). Assuming a pretest probability of 20%, Fagan nomogram analysis demonstrated that a positive DL test result increased the posttest probability to 91% for AMD versus normal and 84% for wAMD versus dAMD, while a negative result reduced it to 1% for both tasks (<xref ref-type="fig" rid="figure8">Figures 8A and 8B</xref>).</p><fig position="float" id="figure8"><label>Figure 8.</label><caption><p>Fagan&#x2019;s nomograms evaluating the clinical utility of deep learning algorithms. (A) Clinical utility for classifying age-related macular degeneration versus normal. (B) Clinical utility for classifying wet age-related macular degeneration versus dry age-related macular degeneration. The left axis represents the pretest probability (set at 20%), the middle axis represents the likelihood ratio, and the right axis represents the posttest probability. AMD: age-related macular degeneration; dAMD: dry age-related macular degeneration; wAMD: wet age-related macular degeneration.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="jmir_v28i1e97174_fig08.png"/></fig></sec></sec><sec id="s4" sec-type="discussion"><title>Discussion</title><p>In relation to our objective of comparing DL algorithms with ophthalmologists and identifying factors that influence diagnostic performance, the main finding is that DL models showed strong pooled performance for both AMD detection and wAMD versus dAMD classification, but the strength of this evidence differs across comparisons. The clinician comparisons suggest a possible role for DL as a consistent decision-support baseline, yet the sparse junior-ophthalmologist data and wide between-study variability mean that these findings should be interpreted as hypothesis-generating rather than definitive for deployment.</p><p>Our meta-analysis reveals a deployment-relevant pattern. Within the analyzed datasets, DL algorithms demonstrated significantly higher pooled sensitivity and accuracy compared to the available metrics for senior ophthalmologists in distinguishing AMD from normal controls; however, no significant differences were observed between DL and junior ophthalmologists across any diagnostic metrics. The higher pooled performance of DL suggests that these models possess an enhanced capability to detect subtle, pixel-level morphological changes and nonlinear feature interactions, such as early exudative signs, that may elude the visual inspection of even the most experienced clinicians [<xref ref-type="bibr" rid="ref20">20</xref>,<xref ref-type="bibr" rid="ref24">24</xref>,<xref ref-type="bibr" rid="ref36">36</xref>]. Conversely, the lower sensitivity observed in senior ophthalmologists appears to be behaviorally driven by specific decision thresholds rather than skill deficiencies. As evidenced by the data from studies such as Matsuba et al [<xref ref-type="bibr" rid="ref20">20</xref>], Bao et al [<xref ref-type="bibr" rid="ref24">24</xref>], and Oliveira et al [<xref ref-type="bibr" rid="ref36">36</xref>], senior experts exhibited a distinct preference for conservative diagnostic thresholds, achieving high specificity (summary 0.93) but notably reduced sensitivity (summary 0.75). This indicates a clinical preference for &#x201C;rule-in&#x201D; strategies to strictly avoid false positives, a constraint that DL algorithms do not possess. However, the unexpected parity between DL algorithms and junior practitioners should be interpreted with caution; this finding is attributable to data sparsity rather than clinical equivalence, as the analysis is restricted to only one study for AMD detection and two studies for wAMD classification, introducing a high risk of small-sample bias [<xref ref-type="bibr" rid="ref20">20</xref>,<xref ref-type="bibr" rid="ref24">24</xref>,<xref ref-type="bibr" rid="ref36">36</xref>]. Similarly, the comparisons with senior ophthalmologists, while more robust, remain limited in number (three studies for AMD detection, two for wAMD classification), and the resulting estimates should be considered preliminary rather than definitive. This data-driven limitation, however, stands in contrast to the wAMD versus dAMD task, where sufficient head-to-head comparisons (two studies) enabled a stratified analysis, revealing distinct behavioral patterns across experience levels [<xref ref-type="bibr" rid="ref20">20</xref>,<xref ref-type="bibr" rid="ref24">24</xref>,<xref ref-type="bibr" rid="ref36">36</xref>].</p><p>The very high AUC values observed in our analysis warrant careful interpretation and should not be equated with flawless real-world diagnostic performance. Restricted test distributions, curated image quality, internal validation, repeated use of public datasets, model selection based on the best-performing algorithm, and insufficiently documented patient-level splitting may inflate discrimination [<xref ref-type="bibr" rid="ref25">25</xref>,<xref ref-type="bibr" rid="ref38">38</xref>,<xref ref-type="bibr" rid="ref43">43</xref>,<xref ref-type="bibr" rid="ref46">46</xref>]. Potential data leakage cannot be excluded in studies that did not clearly separate patients, eyes, or images across training, validation, and test sets. Therefore, AUCs close to 1.0 indicate excellent discrimination within the analyzed datasets, not proof of flawless performance in prospective clinical workflows [<xref ref-type="bibr" rid="ref47">47</xref>,<xref ref-type="bibr" rid="ref48">48</xref>].</p><p>In the clinically critical task of differentiating wAMD from dAMD, automated DL systems demonstrated robustness across varying datasets. Our results indicated that the pooled metrics of DL algorithms not only showed higher specificity and accuracy compared to junior ophthalmologists but also indicated higher sensitivity and accuracy relative to senior ophthalmologists. Rather than framing this as DL mitigating an &#x201C;experience gap&#x201D; or correcting specific human errors, these findings suggest that DL algorithms offer a more consistent and objective diagnostic baseline that balances sensitivity and specificity. These findings advocate for a collaborative clinical paradigm: DL algorithms could serve as a triage filter to enhance specificity for primary care providers while functioning as a high-sensitivity &#x201C;second reader&#x201D; for specialists resolving equivocal wAMD cases [<xref ref-type="bibr" rid="ref49">49</xref>].</p><p>Interestingly, our subgroup analysis highlights the relatively consistent performance of OCT-based models for automated AMD classification. OCT-based models significantly outperformed CFP-based approaches, driven by the capture of pathognomonic cross-sectional features&#x2014;such as intraretinal fluid and pigment epithelial detachment&#x2014;that are often obscured in 2D fundus photography [<xref ref-type="bibr" rid="ref50">50</xref>]. Intriguingly, multimodal DL (OCT + CFP) did not significantly surpass standalone OCT models. This suggests a &#x201C;saturation effect,&#x201D; where the rich structural data of OCT capture the vast majority of diagnostic signals, rendering the incremental value of CFP marginal [<xref ref-type="bibr" rid="ref35">35</xref>]. In practice, the underperformance of multimodal models relative to standalone OCT may also stem from feature redundancy and fusion noise [<xref ref-type="bibr" rid="ref51">51</xref>]; when OCT and CFP capture substantially overlapping diagnostic information [<xref ref-type="bibr" rid="ref43">43</xref>], their combination can paradoxically introduce variance through misaligned spatial features, registration errors, and conflicting feature representations, ultimately degrading rather than enhancing the decision boundary. From a translational perspective, this finding is pivotal; it implies that the computational cost and technical challenges of multimodal alignment (eg, fusion noise and registration errors) may currently outweigh the clinical benefits [<xref ref-type="bibr" rid="ref51">51</xref>,<xref ref-type="bibr" rid="ref52">52</xref>]. Therefore, unless fusion strategies are substantially optimized, OCT-based workflows currently appear to be a practical foundation for clinical deployment [<xref ref-type="bibr" rid="ref35">35</xref>].</p><p>Building upon the baselines established by previous meta-analyses, this systematic review advances the understanding of DL in AMD diagnosis. In 2023, Leng et al [<xref ref-type="bibr" rid="ref9">9</xref>] reported a pooled sensitivity of 94% and specificity of 97% for convolutional neural network algorithms. More recently, Chen et al [<xref ref-type="bibr" rid="ref27">27</xref>] highlighted the superiority of AI over retinal specialists. Our analysis incorporates the latest studies using advanced architectures, such as Vision Transformers. This inclusion yields modestly higher pooled metrics, reflecting the field&#x2019;s technological maturation [<xref ref-type="bibr" rid="ref33">33</xref>]. Most significantly, this systematic review distinguishes itself through four methodological innovations that enhance clinical relevance: (1) a stratified comparison of AI versus ophthalmologists, explicitly differentiating by experience level; (2) the application of the PROBAST+AI tool for bias assessment, complemented by the GRADE framework; (3) a rigorous subgroup analysis by imaging modality (OCT, CFP, multimodal) to isolate technical performance drivers; and (4) a granular evaluation extending beyond binary detection to the specific classification of wAMD versus dAMD. Collectively, these advancements establish a more robust evidence base than prior reviews.</p><p>Heterogeneity is central to the interpretation of these findings [<xref ref-type="bibr" rid="ref53">53</xref>]. Extreme between-study variability should not be treated only as a statistical descriptor; it indicates that pooled estimates may not transfer reliably to clinics with different devices, acquisition protocols, labeling rules, disease spectra, or patient populations [<xref ref-type="bibr" rid="ref53">53</xref>-<xref ref-type="bibr" rid="ref55">55</xref>]. By using a bivariate random-effects model and multivariable meta-regression, we identified that differences in validation strategies (internal vs external), database sources (open vs. private), and study settings (single-center vs. multicenter) significantly influence diagnostic performance. Studies relying solely on internal validation frequently reported inflated metrics, illustrating a generalization gap when models face domain shifts in image acquisition or demographics [<xref ref-type="bibr" rid="ref50">50</xref>]. Similarly, single-center studies risk overfitting to specific center features arising from uniform protocols, whereas multicenter designs typically demonstrate greater robustness through exposure to diverse image qualities [<xref ref-type="bibr" rid="ref50">50</xref>]. Consequently, our analysis suggests that database diversity and annotation quality are likely more critical determinants of generalizability than mere data accessibility. Furthermore, specific outliers in the bivariate box plot (Skevas et al [<xref ref-type="bibr" rid="ref21">21</xref>], Celebi et al [<xref ref-type="bibr" rid="ref26">26</xref>], Grassmann et al [<xref ref-type="bibr" rid="ref31">31</xref>], El-Den et al [<xref ref-type="bibr" rid="ref29">29</xref>], Le et al [<xref ref-type="bibr" rid="ref33">33</xref>], Wang et al [<xref ref-type="bibr" rid="ref42">42</xref>], and Yoo et al [<xref ref-type="bibr" rid="ref43">43</xref>]) highlight how methodological divergences, such as algorithm architecture and data curation strategies, can materially affect performance. This indicates that future improvements in AI reliability will depend less on novel model architectures and more on the curation of diverse, multicenter external validation datasets. The substantial heterogeneity observed in this systematic review and meta-analysis warrants careful interpretation. Rather than reflecting routine statistical noise, this level of heterogeneity signals the pooling of fundamentally diverse data sources. Specifically, the included studies used different imaging hardware (eg, Heidelberg Spectralis, Topcon, Zeiss Cirrus OCT devices; various fundus camera systems), acquisition protocols (varying image resolutions, fields of view, and scan patterns), and ground-truth labeling methodologies (ranging from consensus grading by multiple retinal specialists to single-expert annotation or semi-automated classification systems) [<xref ref-type="bibr" rid="ref32">32</xref>,<xref ref-type="bibr" rid="ref35">35</xref>,<xref ref-type="bibr" rid="ref45">45</xref>]. These technical and methodological differences fundamentally influence the feature space available to DL algorithms and likely account for much of the observed heterogeneity. The PIs (eg, sensitivity: 0.95&#x2010;0.99; specificity: 0.95&#x2010;0.99 for distinguishing AMD from normal retinas; sensitivity: 0.89&#x2010;0.97; specificity: 0.92&#x2010;0.97 for classifying wAMD vs dAMD) further underscore that the average pooled performance, while encouraging, may not be representative of performance in any individual deployment setting. This finding has important implications for clinical deployment: site-specific validation using local imaging equipment and patient populations remains essential before implementing any DL-based AMD screening system.</p><p>These heterogeneity findings provide concrete guidance for future study design [<xref ref-type="bibr" rid="ref56">56</xref>]. Investigators should prioritize external validation on datasets from institutions and populations distinct from the training data [<xref ref-type="bibr" rid="ref53">53</xref>], use multicenter designs incorporating diverse imaging devices and acquisition protocols, report patient-level separation between training and testing data, and stratify performance by imaging device, acquisition protocol, labeling method, and patient demographics [<xref ref-type="bibr" rid="ref56">56</xref>]. Because PIs indicate that local performance may differ from pooled estimates, summary metrics alone should not be used as a deployment decision rule [<xref ref-type="bibr" rid="ref54">54</xref>].</p><p>Translating these findings into practice, DL algorithms exhibit the potential to augment the diagnostic workflow rather than replace it. The superior accuracy of DL in classifying wAMD versus dAMD suggests potential use in resource-limited settings or tele-ophthalmology screening. However, considering current algorithms are primarily trained on isolated OCT or CFP images, they often lack integration with other imaging modalities or clinical parameters; future models should therefore evaluate multimodal imaging and patient clinical contexts to emulate comprehensive diagnoses [<xref ref-type="bibr" rid="ref57">57</xref>]. Beyond these technical and clinical considerations, significant implementation barriers persist, including the scarcity of expert-annotated data, regulatory hurdles, and technical challenges regarding data availability, model interpretability, transparency, and generalization capability [<xref ref-type="bibr" rid="ref36">36</xref>,<xref ref-type="bibr" rid="ref52">52</xref>,<xref ref-type="bibr" rid="ref58">58</xref>]. Advances in few-shot learning, self-supervised models, and centralized platforms may support a more integrated AI ecosystem, requiring sustained multidisciplinary efforts to optimize AI safety and support safe clinical practice [<xref ref-type="bibr" rid="ref52">52</xref>,<xref ref-type="bibr" rid="ref58">58</xref>].</p><p>Beyond diagnostic performance metrics, the successful clinical translation of DL algorithms requires addressing practical implementation challenges. These include seamless integration into existing electronic health record systems and ophthalmic imaging workflows, real-time processing capabilities compatible with clinical time constraints, and intuitive user interfaces that present AI-generated results in a manner that supports rather than disrupts clinical decision-making [<xref ref-type="bibr" rid="ref46">46</xref>]. Furthermore, clinician trust and acceptance&#x2014;shaped by model interpretability, transparency of AI reasoning, and consistent performance across diverse clinical scenarios&#x2014;are prerequisites for successful adoption [<xref ref-type="bibr" rid="ref23">23</xref>,<xref ref-type="bibr" rid="ref24">24</xref>]. Future validation of DL tools must therefore extend beyond accuracy benchmarks to encompass usability studies, clinician acceptance evaluations, and workflow efficiency assessments in real-world clinical settings [<xref ref-type="bibr" rid="ref41">41</xref>].</p><p>Our findings should be interpreted considering several limitations. First, the predominance of retrospective study designs (26 of 28 included studies) represents a fundamental limitation that must be carefully considered when interpreting the strong pooled performance metrics. Retrospective datasets are typically curated from clinical archives, which may systematically exclude poor-quality images, atypical presentations, and diagnostically challenging cases that are routinely encountered in prospective clinical workflows. This selection inherently inflates the apparent diagnostic performance and limits the generalizability of our findings to real-world screening and clinical deployment settings [<xref ref-type="bibr" rid="ref48">48</xref>]. Second, to address potential patient overlap and maintain statistical independence, we extracted performance metrics exclusively from the primary AI algorithm within each study, omitting data from suboptimal models. While methodologically sound for meta-analysis, this approach inherently reflects a &#x201C;best-case scenario&#x201D; that likely inflates the pooled performance estimates compared to average algorithmic performance. This reporting bias is an inherent limitation of the current DL literature in ophthalmology and should be carefully considered by clinicians and policymakers when interpreting these results for clinical implementation decisions [<xref ref-type="bibr" rid="ref12">12</xref>]. Future research should therefore granularly evaluate performance variances across different algorithmic architectures, including less optimal models, to ensure a more balanced and realistic assessment of the DL landscape. Third, direct head-to-head comparisons between DL algorithms and ophthalmologists were small, particularly for the AMD versus normal task where only one study provided junior ophthalmologist data, limiting the statistical power of these specific subgroups [<xref ref-type="bibr" rid="ref50">50</xref>]. Future research must prioritize prospective, multicenter trials with prespecified human comparison arms to definitively validate these retrospective results [<xref ref-type="bibr" rid="ref12">12</xref>].</p><p>In conclusion, this systematic review suggests that, compared with ophthalmologists, DL algorithms demonstrate superior and more balanced diagnostic performance for AMD image classification, providing a consistent decision-support baseline that mitigates the threshold-dependent trade-offs observed in human graders. However, these relative-performance findings remain preliminary because head-to-head evidence is sparse, especially for junior ophthalmologists, and because wide PIs, high heterogeneity, retrospective designs, and possible inflation from restricted datasets, internal validation, or leakage limit clinical transportability. DL systems should therefore be locally calibrated and prospectively validated as triage adjuncts rather than autonomous replacements. Before implementation, prospective multicenter studies should test representative patients, use strict patient-level external validation [<xref ref-type="bibr" rid="ref56">56</xref>], include prespecified human comparison arms [<xref ref-type="bibr" rid="ref59">59</xref>], and evaluate workflow integration, interpretability, and safety [<xref ref-type="bibr" rid="ref60">60</xref>].</p></sec></body><back><ack><p>The authors declare the use of generative artificial intelligence (GenAI) in the research and writing process. According to the GAIDeT taxonomy (2025), the following tasks were delegated to GenAI tools under full human supervision:</p><p/><p>- Text generation</p><p>- Proofreading and editing</p><p>- Summarizing text</p><p>- Formulation of conclusions</p><p>- Translation</p><p>- Reformatting</p><p/><p>The GenAI tool used was: OpenAI Codex (GPT-5).</p><p>Responsibility for the final manuscript lies entirely with the authors.</p><p>GenAI tools are not listed as authors and do not bear responsibility for the final outcomes.</p><p>Declaration submitted by: The primary author (YZ)</p></ack><notes><sec><title>Funding</title><p>The authors declared no financial support was received for this work.</p></sec><sec><title>Data Availability</title><p>All data generated or analyzed during this systematic review and meta-analysis are included in this published article and its supplementary information files.</p></sec></notes><fn-group><fn fn-type="con"><p>Conceptualization, methodology, software, formal analysis, investigation (screening, quality assessment, and data extraction), data curation, writing &#x2013; original draft, visualization: YZ</p><p>Investigation (literature search and screening): YN</p><p>Investigation (study selection): SYS</p><p>Investigation (quality assessment): WL</p><p>Data curation, Investigation (data extraction): YD</p><p>Supervision, Validation, Writing &#x2013; review and editing: YG</p><p>All authors read and approved the final manuscript.</p></fn><fn fn-type="conflict"><p>None declared.</p></fn></fn-group><glossary><title>Abbreviations</title><def-list><def-item><term id="abb1">AI</term><def><p>artificial intelligence</p></def></def-item><def-item><term id="abb2">AMD</term><def><p>age-related macular degeneration</p></def></def-item><def-item><term id="abb3">AUC</term><def><p>area under the curve</p></def></def-item><def-item><term id="abb4">CFP</term><def><p>color fundus photography</p></def></def-item><def-item><term id="abb5">dAMD</term><def><p>dry age-related macular degeneration</p></def></def-item><def-item><term id="abb6">DL</term><def><p>deep learning</p></def></def-item><def-item><term id="abb7">FN</term><def><p>false negative</p></def></def-item><def-item><term id="abb8">FP</term><def><p>false positive</p></def></def-item><def-item><term id="abb9">GRADE</term><def><p>Grading of Recommendations, Assessment, Development, and Evaluations</p></def></def-item><def-item><term id="abb10">OCT</term><def><p>optical coherence tomography</p></def></def-item><def-item><term id="abb11">PI</term><def><p>prediction interval</p></def></def-item><def-item><term id="abb12">PITROS</term><def><p>Patient, Index test, Target condition, Reference standard, Outcome, and Setting</p></def></def-item><def-item><term id="abb13">PRISMA</term><def><p>Preferred Reporting Items for Systematic Reviews and Meta-Analyses</p></def></def-item><def-item><term id="abb14">PRISMA-DTA</term><def><p>Preferred Reporting Items for Systematic Reviews and Meta-Analyses of Diagnostic Test Accuracy</p></def></def-item><def-item><term id="abb15">PROBAST+AI</term><def><p>Prediction model Risk Of Bias Assessment Tool for Artificial Intelligence</p></def></def-item><def-item><term id="abb16">PROSPERO</term><def><p>International Prospective Register of Systematic Reviews</p></def></def-item><def-item><term id="abb17">TN</term><def><p>true negative</p></def></def-item><def-item><term id="abb18">TP</term><def><p>true positive</p></def></def-item><def-item><term id="abb19">wAMD</term><def><p>wet age-related macular degeneration</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Jeong</surname><given-names>YD</given-names> </name><name name-style="western"><surname>Park</surname><given-names>S</given-names> </name><name name-style="western"><surname>Kim</surname><given-names>MS</given-names> </name><etal/></person-group><article-title>Global burden of vision impairment due to age-related macular degeneration, 1990&#x2013;2021, with forecasts to 2050: a systematic analysis for the Global Burden of Disease Study 2021</article-title><source>Lancet Glob Health</source><year>2025</year><month>07</month><volume>13</volume><issue>7</issue><fpage>e1175</fpage><lpage>e1190</lpage><pub-id pub-id-type="doi">10.1016/S2214-109X(25)00143-3</pub-id></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Marchesi</surname><given-names>N</given-names> </name><name name-style="western"><surname>Capierri</surname><given-names>M</given-names> </name><name name-style="western"><surname>Pascale</surname><given-names>A</given-names> </name><name name-style="western"><surname>Barbieri</surname><given-names>A</given-names> </name></person-group><article-title>Different therapeutic approaches for dry and wet AMD</article-title><source>Int J Mol Sci</source><year>2024</year><month>12</month><day>4</day><volume>25</volume><issue>23</issue><fpage>13053</fpage><pub-id pub-id-type="doi">10.3390/ijms252313053</pub-id><pub-id pub-id-type="medline">39684764</pub-id></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Midena</surname><given-names>E</given-names> </name><name name-style="western"><surname>Frizziero</surname><given-names>L</given-names> </name><name name-style="western"><surname>Torresin</surname><given-names>T</given-names> </name><name name-style="western"><surname>Boscolo Todaro</surname><given-names>P</given-names> </name><name name-style="western"><surname>Miglionico</surname><given-names>G</given-names> </name><name name-style="western"><surname>Pilotto</surname><given-names>E</given-names> </name></person-group><article-title>Optical coherence tomography and color fundus photography in the screening of age-related macular degeneration: a comparative, population-based study</article-title><source>PLoS ONE</source><year>2020</year><volume>15</volume><issue>8</issue><fpage>e0237352</fpage><pub-id pub-id-type="doi">10.1371/journal.pone.0237352</pub-id><pub-id pub-id-type="medline">32797085</pub-id></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Trinh</surname><given-names>M</given-names> </name><name name-style="western"><surname>Cheung</surname><given-names>R</given-names> </name><name name-style="western"><surname>Nam</surname><given-names>J</given-names> </name><name name-style="western"><surname>Ng</surname><given-names>D</given-names> </name><name name-style="western"><surname>Nivison-Smith</surname><given-names>L</given-names> </name><name name-style="western"><surname>Ly</surname><given-names>A</given-names> </name></person-group><article-title>High risk does not guarantee high accuracy-evaluating the prognostic accuracy of OCT biomarkers for predicting late AMD</article-title><source>Ophthalmic Physiol Opt</source><year>2025</year><month>09</month><volume>45</volume><issue>6</issue><fpage>1293</fpage><lpage>1301</lpage><pub-id pub-id-type="doi">10.1111/opo.13547</pub-id><pub-id pub-id-type="medline">40557781</pub-id></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Moradi</surname><given-names>M</given-names> </name><name name-style="western"><surname>Chen</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Du</surname><given-names>X</given-names> </name><name name-style="western"><surname>Seddon</surname><given-names>JM</given-names> </name></person-group><article-title>Deep ensemble learning for automated non-advanced AMD classification using optimized retinal layer segmentation and SD-OCT scans</article-title><source>Comput Biol Med</source><year>2023</year><month>03</month><volume>154</volume><issue>106512</issue><fpage>106512</fpage><pub-id pub-id-type="doi">10.1016/j.compbiomed.2022.106512</pub-id><pub-id pub-id-type="medline">36701964</pub-id></nlm-citation></ref><ref id="ref6"><label>6</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Neri</surname><given-names>G</given-names> </name><name name-style="western"><surname>Rebecchi</surname><given-names>C</given-names> </name><name name-style="western"><surname>Oakley</surname><given-names>JD</given-names> </name><etal/></person-group><article-title>Deep learning model for automated classification of macular neovascularization subtypes in AMD</article-title><source>Invest Ophthalmol Vis Sci</source><year>2025</year><month>07</month><day>1</day><volume>66</volume><issue>9</issue><fpage>55</fpage><pub-id pub-id-type="doi">10.1167/iovs.66.9.55</pub-id><pub-id pub-id-type="medline">40689724</pub-id></nlm-citation></ref><ref id="ref7"><label>7</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Liu</surname><given-names>X</given-names> </name><name name-style="western"><surname>Faes</surname><given-names>L</given-names> </name><name name-style="western"><surname>Kale</surname><given-names>AU</given-names> </name><etal/></person-group><article-title>A comparison of deep learning performance against health-care professionals in detecting diseases from medical imaging: a systematic review and meta-analysis</article-title><source>Lancet Digit Health</source><year>2019</year><month>10</month><volume>1</volume><issue>6</issue><fpage>e271</fpage><lpage>e297</lpage><pub-id pub-id-type="doi">10.1016/S2589-7500(19)30123-2</pub-id><pub-id pub-id-type="medline">33323251</pub-id></nlm-citation></ref><ref id="ref8"><label>8</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Pandey</surname><given-names>PU</given-names> </name><name name-style="western"><surname>Ballios</surname><given-names>BG</given-names> </name><name name-style="western"><surname>Christakis</surname><given-names>PG</given-names> </name><etal/></person-group><article-title>Ensemble of deep convolutional neural networks is more accurate and reliable than board-certified ophthalmologists at detecting multiple diseases in retinal fundus photographs</article-title><source>Br J Ophthalmol</source><year>2024</year><month>02</month><day>21</day><volume>108</volume><issue>3</issue><fpage>417</fpage><lpage>423</lpage><pub-id pub-id-type="doi">10.1136/bjo-2022-322183</pub-id><pub-id pub-id-type="medline">36720585</pub-id></nlm-citation></ref><ref id="ref9"><label>9</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Leng</surname><given-names>X</given-names> </name><name name-style="western"><surname>Shi</surname><given-names>R</given-names> </name><name name-style="western"><surname>Wu</surname><given-names>Y</given-names> </name><etal/></person-group><article-title>Deep learning for detection of age-related macular degeneration: a systematic review and meta-analysis of diagnostic test accuracy studies</article-title><source>PLoS ONE</source><year>2023</year><volume>18</volume><issue>4</issue><fpage>e0284060</fpage><pub-id pub-id-type="doi">10.1371/journal.pone.0284060</pub-id><pub-id pub-id-type="medline">37023082</pub-id></nlm-citation></ref><ref id="ref10"><label>10</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Chen</surname><given-names>KY</given-names> </name><name name-style="western"><surname>Chan</surname><given-names>HC</given-names> </name><name name-style="western"><surname>Chan</surname><given-names>CM</given-names> </name></person-group><article-title>Can artificial intelligence with multimodal imaging outperform traditional methods in predicting age-related macular degeneration progression? A systematic review and exploratory meta-analysis</article-title><source>BMC Med Inform Decis Mak</source><year>2025</year><month>09</month><day>1</day><volume>25</volume><issue>1</issue><fpage>321</fpage><pub-id pub-id-type="doi">10.1186/s12911-025-03119-z</pub-id><pub-id pub-id-type="medline">40890721</pub-id></nlm-citation></ref><ref id="ref11"><label>11</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Moons</surname><given-names>KGM</given-names> </name><name name-style="western"><surname>Damen</surname><given-names>JAA</given-names> </name><name name-style="western"><surname>Kaul</surname><given-names>T</given-names> </name><etal/></person-group><article-title>PROBAST+AI: an updated quality, risk of bias, and applicability assessment tool for prediction models using regression or artificial intelligence methods</article-title><source>BMJ</source><year>2025</year><month>03</month><day>24</day><volume>388</volume><fpage>e082505</fpage><pub-id pub-id-type="doi">10.1136/bmj-2024-082505</pub-id><pub-id pub-id-type="medline">40127903</pub-id></nlm-citation></ref><ref id="ref12"><label>12</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Vrudhula</surname><given-names>A</given-names> </name><name name-style="western"><surname>Kwan</surname><given-names>AC</given-names> </name><name name-style="western"><surname>Ouyang</surname><given-names>D</given-names> </name><name name-style="western"><surname>Cheng</surname><given-names>S</given-names> </name></person-group><article-title>Machine learning and bias in medical imaging: opportunities and challenges</article-title><source>Circ Cardiovasc Imaging</source><year>2024</year><month>02</month><volume>17</volume><issue>2</issue><fpage>e015495</fpage><pub-id pub-id-type="doi">10.1161/CIRCIMAGING.123.015495</pub-id><pub-id pub-id-type="medline">38377237</pub-id></nlm-citation></ref><ref id="ref13"><label>13</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Salameh</surname><given-names>JP</given-names> </name><name name-style="western"><surname>Bossuyt</surname><given-names>PM</given-names> </name><name name-style="western"><surname>McGrath</surname><given-names>TA</given-names> </name><etal/></person-group><article-title>Preferred reporting items for systematic review and meta-analysis of diagnostic test accuracy studies (PRISMA-DTA): explanation, elaboration, and checklist</article-title><source>BMJ</source><year>2020</year><month>08</month><day>14</day><volume>370</volume><fpage>m2632</fpage><pub-id pub-id-type="doi">10.1136/bmj.m2632</pub-id><pub-id pub-id-type="medline">32816740</pub-id></nlm-citation></ref><ref id="ref14"><label>14</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Arends</surname><given-names>LR</given-names> </name><name name-style="western"><surname>Hamza</surname><given-names>TH</given-names> </name><name name-style="western"><surname>van Houwelingen</surname><given-names>JC</given-names> </name><name name-style="western"><surname>Heijenbrok-Kal</surname><given-names>MH</given-names> </name><name name-style="western"><surname>Hunink</surname><given-names>MGM</given-names> </name><name name-style="western"><surname>Stijnen</surname><given-names>T</given-names> </name></person-group><article-title>Bivariate random effects meta-analysis of ROC curves</article-title><source>Med Decis Making</source><year>2008</year><volume>28</volume><issue>5</issue><fpage>621</fpage><lpage>638</lpage><pub-id pub-id-type="doi">10.1177/0272989X08319957</pub-id><pub-id pub-id-type="medline">18591542</pub-id></nlm-citation></ref><ref id="ref15"><label>15</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>IntHout</surname><given-names>J</given-names> </name><name name-style="western"><surname>Ioannidis</surname><given-names>JPA</given-names> </name><name name-style="western"><surname>Borm</surname><given-names>GF</given-names> </name></person-group><article-title>The Hartung-Knapp-Sidik-Jonkman method for random effects meta-analysis is straightforward and considerably outperforms the standard DerSimonian-Laird method</article-title><source>BMC Med Res Methodol</source><year>2014</year><month>02</month><day>18</day><volume>14</volume><fpage>25</fpage><pub-id pub-id-type="doi">10.1186/1471-2288-14-25</pub-id><pub-id pub-id-type="medline">24548571</pub-id></nlm-citation></ref><ref id="ref16"><label>16</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Borenstein</surname><given-names>M</given-names> </name></person-group><article-title>How to understand and report heterogeneity in a meta-analysis: the difference between I-squared and prediction intervals</article-title><source>Integr Med Res</source><year>2023</year><month>12</month><volume>12</volume><issue>4</issue><fpage>101014</fpage><pub-id pub-id-type="doi">10.1016/j.imr.2023.101014</pub-id><pub-id pub-id-type="medline">38938910</pub-id></nlm-citation></ref><ref id="ref17"><label>17</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Deeks</surname><given-names>JJ</given-names> </name><name name-style="western"><surname>Macaskill</surname><given-names>P</given-names> </name><name name-style="western"><surname>Irwig</surname><given-names>L</given-names> </name></person-group><article-title>The performance of tests of publication bias and other sample size effects in systematic reviews of diagnostic test accuracy was assessed</article-title><source>J Clin Epidemiol</source><year>2005</year><month>09</month><volume>58</volume><issue>9</issue><fpage>882</fpage><lpage>893</lpage><pub-id pub-id-type="doi">10.1016/j.jclinepi.2005.01.016</pub-id><pub-id pub-id-type="medline">16085191</pub-id></nlm-citation></ref><ref id="ref18"><label>18</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Lau</surname><given-names>J</given-names> </name><name name-style="western"><surname>Ioannidis</surname><given-names>JPA</given-names> </name><name name-style="western"><surname>Terrin</surname><given-names>N</given-names> </name><name name-style="western"><surname>Schmid</surname><given-names>CH</given-names> </name><name name-style="western"><surname>Olkin</surname><given-names>I</given-names> </name></person-group><article-title>The case of the misleading funnel plot</article-title><source>BMJ</source><year>2006</year><month>09</month><day>16</day><volume>333</volume><issue>7568</issue><fpage>597</fpage><lpage>600</lpage><pub-id pub-id-type="doi">10.1136/bmj.333.7568.597</pub-id><pub-id pub-id-type="medline">16974018</pub-id></nlm-citation></ref><ref id="ref19"><label>19</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Heo</surname><given-names>TY</given-names> </name><name name-style="western"><surname>Kim</surname><given-names>KM</given-names> </name><name name-style="western"><surname>Min</surname><given-names>HK</given-names> </name><etal/></person-group><article-title>Development of a deep-learning-based artificial intelligence tool for differential diagnosis between dry and neovascular age-related macular degeneration</article-title><source>Diagnostics (Basel)</source><year>2020</year><month>04</month><day>28</day><volume>10</volume><issue>5</issue><fpage>261</fpage><pub-id pub-id-type="doi">10.3390/diagnostics10050261</pub-id><pub-id pub-id-type="medline">32354098</pub-id></nlm-citation></ref><ref id="ref20"><label>20</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Matsuba</surname><given-names>S</given-names> </name><name name-style="western"><surname>Tabuchi</surname><given-names>H</given-names> </name><name name-style="western"><surname>Ohsugi</surname><given-names>H</given-names> </name><etal/></person-group><article-title>Accuracy of ultra-wide-field fundus ophthalmoscopy-assisted deep learning, a machine-learning technology, for detecting age-related macular degeneration</article-title><source>Int Ophthalmol</source><year>2019</year><month>06</month><volume>39</volume><issue>6</issue><fpage>1269</fpage><lpage>1275</lpage><pub-id pub-id-type="doi">10.1007/s10792-018-0940-0</pub-id><pub-id pub-id-type="medline">29744763</pub-id></nlm-citation></ref><ref id="ref21"><label>21</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Skevas</surname><given-names>C</given-names> </name><name name-style="western"><surname>Weindler</surname><given-names>H</given-names> </name><name name-style="western"><surname>Levering</surname><given-names>M</given-names> </name><name name-style="western"><surname>Engelberts</surname><given-names>J</given-names> </name><name name-style="western"><surname>van Grinsven</surname><given-names>M</given-names> </name><name name-style="western"><surname>Katz</surname><given-names>T</given-names> </name></person-group><article-title>Simultaneous screening and classification of diabetic retinopathy and age-related macular degeneration based on fundus photos-a prospective analysis of the RetCAD system</article-title><source>Int J Ophthalmol</source><year>2022</year><volume>15</volume><issue>12</issue><fpage>1985</fpage><lpage>1993</lpage><pub-id pub-id-type="doi">10.18240/ijo.2022.12.14</pub-id><pub-id pub-id-type="medline">36536981</pub-id></nlm-citation></ref><ref id="ref22"><label>22</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Abdelhalim</surname><given-names>I</given-names> </name><name name-style="western"><surname>Nadmid</surname><given-names>N</given-names> </name><name name-style="western"><surname>Elsharkawy</surname><given-names>M</given-names> </name><name name-style="western"><surname>Ghazal</surname><given-names>M</given-names> </name><name name-style="western"><surname>Mahmoud</surname><given-names>AH</given-names> </name><name name-style="western"><surname>El-Baz</surname><given-names>A</given-names> </name></person-group><article-title>Mask-unmask regions (MUMR) framework for classifying AMD grades using inter-regional interaction analysis</article-title><source>IEEE ACCESS</source><year>2025</year><volume>13</volume><fpage>8286</fpage><lpage>8296</lpage><pub-id pub-id-type="doi">10.1109/ACCESS.2025.3526948</pub-id></nlm-citation></ref><ref id="ref23"><label>23</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Alenezi</surname><given-names>A</given-names> </name><name name-style="western"><surname>Alhamad</surname><given-names>H</given-names> </name><name name-style="western"><surname>Brindhaban</surname><given-names>A</given-names> </name><name name-style="western"><surname>Amizadeh</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Jodeiri</surname><given-names>A</given-names> </name><name name-style="western"><surname>Danishvar</surname><given-names>S</given-names> </name></person-group><article-title>Enhancing readability and detection of age-related macular degeneration using optical coherence tomography imaging: an AI approach</article-title><source>Bioengineering (Basel)</source><year>2024</year><month>03</month><day>22</day><volume>11</volume><issue>4</issue><fpage>300</fpage><pub-id pub-id-type="doi">10.3390/bioengineering11040300</pub-id><pub-id pub-id-type="medline">38671722</pub-id></nlm-citation></ref><ref id="ref24"><label>24</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Bao</surname><given-names>S</given-names> </name><name name-style="western"><surname>Yang</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Zhang</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Qu</surname><given-names>J</given-names> </name><name name-style="western"><surname>Sun</surname><given-names>J</given-names> </name></person-group><article-title>AttResAMD: an attention-driven deep learning framework for expert-level automated classification of age-related macular degeneration from fundus photography</article-title><source>Interdiscip Sci</source><year>2025</year><month>08</month><day>30</day><pub-id pub-id-type="doi">10.1007/s12539-025-00763-x</pub-id><pub-id pub-id-type="medline">40885885</pub-id></nlm-citation></ref><ref id="ref25"><label>25</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Bhatia</surname><given-names>KK</given-names> </name><name name-style="western"><surname>Graham</surname><given-names>MS</given-names> </name><name name-style="western"><surname>Terry</surname><given-names>L</given-names> </name><etal/></person-group><article-title>Disease classification of macular optical coherence tomography scans using deep learning software: validation on independent, multicenter data</article-title><source>Retina (Philadelphia, Pa)</source><year>2020</year><month>08</month><volume>40</volume><issue>8</issue><fpage>1549</fpage><lpage>1557</lpage><pub-id pub-id-type="doi">10.1097/IAE.0000000000002640</pub-id><pub-id pub-id-type="medline">31584557</pub-id></nlm-citation></ref><ref id="ref26"><label>26</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Celebi</surname><given-names>ARC</given-names> </name><name name-style="western"><surname>Bulut</surname><given-names>E</given-names> </name><name name-style="western"><surname>Sezer</surname><given-names>A</given-names> </name></person-group><article-title>Artificial intelligence based detection of age-related macular degeneration using optical coherence tomography with unique image preprocessing</article-title><source>Eur J Ophthalmol</source><year>2023</year><month>01</month><volume>33</volume><issue>1</issue><fpage>65</fpage><lpage>73</lpage><pub-id pub-id-type="doi">10.1177/11206721221096294</pub-id><pub-id pub-id-type="medline">35469472</pub-id></nlm-citation></ref><ref id="ref27"><label>27</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Chen</surname><given-names>M</given-names> </name><name name-style="western"><surname>Jin</surname><given-names>K</given-names> </name><name name-style="western"><surname>Yan</surname><given-names>Y</given-names> </name><etal/></person-group><article-title>Automated diagnosis of age-related macular degeneration using multi-modal vertical plane feature fusion via deep learning</article-title><source>Med Phys</source><year>2022</year><month>04</month><volume>49</volume><issue>4</issue><fpage>2324</fpage><lpage>2333</lpage><pub-id pub-id-type="doi">10.1002/mp.15541</pub-id><pub-id pub-id-type="medline">35172022</pub-id></nlm-citation></ref><ref id="ref28"><label>28</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Durmaz Engin</surname><given-names>C</given-names> </name><name name-style="western"><surname>Be&#x015F;enk</surname><given-names>U</given-names> </name><name name-style="western"><surname>&#x00D6;zizmirliler</surname><given-names>D</given-names> </name><name name-style="western"><surname>Selver</surname><given-names>MA</given-names> </name></person-group><article-title>Comparative analysis of automated vs. expert-designed machine learning models in age-related macular degeneration detection and classification</article-title><source>Turk J Ophthalmol</source><year>2025</year><month>06</month><day>25</day><volume>55</volume><issue>3</issue><fpage>120</fpage><lpage>126</lpage><pub-id pub-id-type="doi">10.4274/tjo.galenos.2025.74780</pub-id><pub-id pub-id-type="medline">40560103</pub-id></nlm-citation></ref><ref id="ref29"><label>29</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>El-Den</surname><given-names>NN</given-names> </name><name name-style="western"><surname>Naglah</surname><given-names>A</given-names> </name><name name-style="western"><surname>Elsharkawy</surname><given-names>M</given-names> </name><etal/></person-group><article-title>Scale-adaptive model for detection and grading of age-related macular degeneration from color retinal fundus images</article-title><source>Sci Rep</source><year>2023</year><month>06</month><day>13</day><volume>13</volume><issue>1</issue><fpage>9590</fpage><pub-id pub-id-type="doi">10.1038/s41598-023-35197-2</pub-id><pub-id pub-id-type="medline">37311794</pub-id></nlm-citation></ref><ref id="ref30"><label>30</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Garc&#x00ED;a-Floriano</surname><given-names>A</given-names> </name><name name-style="western"><surname>Ventura-Molina</surname><given-names>E</given-names> </name></person-group><article-title>Age-related macular degeneration detection in retinal fundus images by a deep convolutional neural network</article-title><source>MATHEMATICS</source><year>2024</year><volume>12</volume><issue>10</issue><fpage>1445</fpage><pub-id pub-id-type="doi">10.3390/math12101445</pub-id></nlm-citation></ref><ref id="ref31"><label>31</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Grassmann</surname><given-names>F</given-names> </name><name name-style="western"><surname>Mengelkamp</surname><given-names>J</given-names> </name><name name-style="western"><surname>Brandl</surname><given-names>C</given-names> </name><etal/></person-group><article-title>A deep learning algorithm for prediction of age-related eye disease study severity scale for age-related macular degeneration from color fundus photography</article-title><source>Ophthalmology</source><year>2018</year><month>09</month><volume>125</volume><issue>9</issue><fpage>1410</fpage><lpage>1420</lpage><pub-id pub-id-type="doi">10.1016/j.ophtha.2018.02.037</pub-id><pub-id pub-id-type="medline">29653860</pub-id></nlm-citation></ref><ref id="ref32"><label>32</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>He</surname><given-names>T</given-names> </name><name name-style="western"><surname>Zhou</surname><given-names>Q</given-names> </name><name name-style="western"><surname>Zou</surname><given-names>Y</given-names> </name></person-group><article-title>Automatic detection of age-related macular degeneration based on deep learning and local outlier factor algorithm</article-title><source>Diagnostics (Basel)</source><year>2022</year><month>02</month><day>18</day><volume>12</volume><issue>2</issue><fpage>532</fpage><pub-id pub-id-type="doi">10.3390/diagnostics12020532</pub-id><pub-id pub-id-type="medline">35204621</pub-id></nlm-citation></ref><ref id="ref33"><label>33</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Le</surname><given-names>NT</given-names> </name><name name-style="western"><surname>Le Truong</surname><given-names>T</given-names> </name><name name-style="western"><surname>Deelertpaiboon</surname><given-names>S</given-names> </name><etal/></person-group><article-title>ViT&#x2010;AMD: a new deep learning model for age&#x2010;related macular degeneration diagnosis from fundus images</article-title><source>INTERNATIONAL JOURNAL OF INTELLIGENT SYSTEMS</source><year>2024</year><month>01</month><volume>2024</volume><issue>1</issue><pub-id pub-id-type="doi">10.1155/2024/3026500</pub-id></nlm-citation></ref><ref id="ref34"><label>34</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Lee</surname><given-names>CS</given-names> </name><name name-style="western"><surname>Baughman</surname><given-names>DM</given-names> </name><name name-style="western"><surname>Lee</surname><given-names>AY</given-names> </name></person-group><article-title>Deep learning is effective for the classification of OCT images of normal versus Age-related Macular Degeneration</article-title><source>Ophthalmol RETINA</source><year>2017</year><volume>1</volume><issue>4</issue><fpage>322</fpage><lpage>327</lpage><pub-id pub-id-type="doi">10.1016/j.oret.2016.12.009</pub-id><pub-id pub-id-type="medline">30693348</pub-id></nlm-citation></ref><ref id="ref35"><label>35</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Leingang</surname><given-names>O</given-names> </name><name name-style="western"><surname>Riedl</surname><given-names>S</given-names> </name><name name-style="western"><surname>Mai</surname><given-names>J</given-names> </name><etal/></person-group><article-title>Automated deep learning-based AMD detection and staging in real-world OCT datasets (PINNACLE study report 5)</article-title><source>Sci Rep</source><year>2023</year><month>11</month><day>9</day><volume>13</volume><issue>1</issue><fpage>19545</fpage><pub-id pub-id-type="doi">10.1038/s41598-023-46626-7</pub-id><pub-id pub-id-type="medline">37945665</pub-id></nlm-citation></ref><ref id="ref36"><label>36</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Oliveira</surname><given-names>GC</given-names> </name><name name-style="western"><surname>Rosa</surname><given-names>GH</given-names> </name><name name-style="western"><surname>Pedronette</surname><given-names>DCG</given-names> </name><etal/></person-group><article-title>Robust deep learning for eye fundus images: bridging real and synthetic data for enhancing generalization</article-title><source>Biomed Signal Process Control</source><year>2024</year><month>08</month><volume>94</volume><fpage>106263</fpage><pub-id pub-id-type="doi">10.1016/j.bspc.2024.106263</pub-id></nlm-citation></ref><ref id="ref37"><label>37</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Tak</surname><given-names>N</given-names> </name><name name-style="western"><surname>Reddy</surname><given-names>AJ</given-names> </name><name name-style="western"><surname>Martel</surname><given-names>J</given-names> </name><name name-style="western"><surname>Martel</surname><given-names>JB</given-names> </name></person-group><article-title>Clinical wide-field retinal image deep learning classification of exudative and non-exudative age-related macular degeneration</article-title><source>Cureus</source><year>2021</year><month>08</month><volume>13</volume><issue>8</issue><fpage>e17579</fpage><pub-id pub-id-type="doi">10.7759/cureus.17579</pub-id><pub-id pub-id-type="medline">34646633</pub-id></nlm-citation></ref><ref id="ref38"><label>38</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Takhchidi</surname><given-names>K</given-names> </name><name name-style="western"><surname>Gliznitsa</surname><given-names>P</given-names> </name><name name-style="western"><surname>Svetozarskiy</surname><given-names>S</given-names> </name><name name-style="western"><surname>Bursov</surname><given-names>A</given-names> </name><name name-style="western"><surname>Shusterzon</surname><given-names>K</given-names> </name></person-group><article-title>Labelling of data on fundus color pictures used to train a deep learning model enhances its macular pathology recognition capabilities</article-title><source>BRSMU</source><year>2021</year><month>08</month><issue>2021(4</issue><pub-id pub-id-type="doi">10.24075/brsmu.2021.040</pub-id></nlm-citation></ref><ref id="ref39"><label>39</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Tan</surname><given-names>JH</given-names> </name><name name-style="western"><surname>Bhandary</surname><given-names>SV</given-names> </name><name name-style="western"><surname>Sivaprasad</surname><given-names>S</given-names> </name><etal/></person-group><article-title>Age-related macular degeneration detection using deep convolutional neural network</article-title><source>FUTURE GENERATION COMPUTER SYSTEMS</source><year>2018</year><month>10</month><volume>87</volume><fpage>127</fpage><lpage>135</lpage><pub-id pub-id-type="doi">10.1016/j.future.2018.05.001</pub-id></nlm-citation></ref><ref id="ref40"><label>40</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Thomas</surname><given-names>A</given-names> </name><name name-style="western"><surname>Harikrishnan</surname><given-names>PM</given-names> </name><name name-style="western"><surname>Gopi</surname><given-names>VP</given-names> </name><name name-style="western"><surname>Palanisamy</surname><given-names>P</given-names> </name></person-group><article-title>An automated method to detect age-related macular degeneration from optical coherence tomographic images</article-title><source>Biomed Eng Appl Basis Commun</source><year>2021</year><month>10</month><volume>33</volume><issue>5</issue><pub-id pub-id-type="doi">10.4015/S1016237221500368</pub-id></nlm-citation></ref><ref id="ref41"><label>41</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wan</surname><given-names>C</given-names> </name><name name-style="western"><surname>Zhao</surname><given-names>JN</given-names> </name><name name-style="western"><surname>Hong</surname><given-names>XQ</given-names> </name><name name-style="western"><surname>Yang</surname><given-names>WH</given-names> </name><name name-style="western"><surname>Zhang</surname><given-names>SC</given-names> </name></person-group><article-title>HCSP-Net: a novel model of age-related macular degeneration classification based on color fundus photography</article-title><source>CMC</source><year>2024</year><volume>79</volume><issue>1</issue><fpage>391</fpage><lpage>407</lpage><pub-id pub-id-type="doi">10.32604/cmc.2024.048307</pub-id></nlm-citation></ref><ref id="ref42"><label>42</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wang</surname><given-names>W</given-names> </name><name name-style="western"><surname>Li</surname><given-names>X</given-names> </name><name name-style="western"><surname>Xu</surname><given-names>Z</given-names> </name><etal/></person-group><article-title>Learning two-stream CNN for multi-modal age-related macular degeneration categorization</article-title><source>IEEE J Biomed Health Inform</source><year>2022</year><month>08</month><volume>26</volume><issue>8</issue><fpage>4111</fpage><lpage>4122</lpage><pub-id pub-id-type="doi">10.1109/JBHI.2022.3171523</pub-id><pub-id pub-id-type="medline">35503853</pub-id></nlm-citation></ref><ref id="ref43"><label>43</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Yoo</surname><given-names>TK</given-names> </name><name name-style="western"><surname>Choi</surname><given-names>JY</given-names> </name><name name-style="western"><surname>Seo</surname><given-names>JG</given-names> </name><name name-style="western"><surname>Ramasubramanian</surname><given-names>B</given-names> </name><name name-style="western"><surname>Selvaperumal</surname><given-names>S</given-names> </name><name name-style="western"><surname>Kim</surname><given-names>DW</given-names> </name></person-group><article-title>The possibility of the combination of OCT and fundus images for improving the diagnostic accuracy of deep learning for age-related macular degeneration: a preliminary experiment</article-title><source>Med Biol Eng Comput</source><year>2019</year><month>03</month><volume>57</volume><issue>3</issue><fpage>677</fpage><lpage>687</lpage><pub-id pub-id-type="doi">10.1007/s11517-018-1915-z</pub-id><pub-id pub-id-type="medline">30349958</pub-id></nlm-citation></ref><ref id="ref44"><label>44</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Yusufo&#x011F;lu</surname><given-names>E</given-names> </name><name name-style="western"><surname>F&#x0131;rat</surname><given-names>H</given-names> </name><name name-style="western"><surname>&#x00DC;zen</surname><given-names>H</given-names> </name><etal/></person-group><article-title>A Comprehensive CNN model for age-related macular degeneration classification using OCT: integrating inception modules, SE blocks, and ConvMixer</article-title><source>Diagnostics (Basel)</source><year>2024</year><month>12</month><day>17</day><volume>14</volume><issue>24</issue><fpage>2836</fpage><pub-id pub-id-type="doi">10.3390/diagnostics14242836</pub-id><pub-id pub-id-type="medline">39767197</pub-id></nlm-citation></ref><ref id="ref45"><label>45</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Zapata</surname><given-names>MA</given-names> </name><name name-style="western"><surname>Royo-Fibla</surname><given-names>D</given-names> </name><name name-style="western"><surname>Font</surname><given-names>O</given-names> </name><etal/></person-group><article-title>Artificial intelligence to identify retinal fundus images, quality validation, laterality evaluation, macular degeneration, and suspected glaucoma</article-title><source>Clin Ophthalmol</source><year>2020</year><volume>14</volume><issue>419-29</issue><fpage>419</fpage><lpage>429</lpage><pub-id pub-id-type="doi">10.2147/OPTH.S235751</pub-id><pub-id pub-id-type="medline">32103888</pub-id></nlm-citation></ref><ref id="ref46"><label>46</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Zhen</surname><given-names>B</given-names> </name><name name-style="western"><surname>Qi</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Tang</surname><given-names>Z</given-names> </name><etal/></person-group><article-title>Low-rank fine-tuning meets cross-modal analysis: a robust framework for age-related macular degeneration categorization</article-title><source>J Digit Imaging Inform med</source><year>2025</year><volume>39</volume><issue>1</issue><fpage>714</fpage><lpage>731</lpage><pub-id pub-id-type="doi">10.1007/s10278-025-01513-7</pub-id></nlm-citation></ref><ref id="ref47"><label>47</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Rosenblatt</surname><given-names>M</given-names> </name><name name-style="western"><surname>Tejavibulya</surname><given-names>L</given-names> </name><name name-style="western"><surname>Jiang</surname><given-names>R</given-names> </name><name name-style="western"><surname>Noble</surname><given-names>S</given-names> </name><name name-style="western"><surname>Scheinost</surname><given-names>D</given-names> </name></person-group><article-title>Data leakage inflates prediction performance in connectome-based machine learning models</article-title><source>Nat Commun</source><year>2024</year><month>02</month><day>28</day><volume>15</volume><issue>1</issue><fpage>1829</fpage><pub-id pub-id-type="doi">10.1038/s41467-024-46150-w</pub-id><pub-id pub-id-type="medline">38418819</pub-id></nlm-citation></ref><ref id="ref48"><label>48</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Varoquaux</surname><given-names>G</given-names> </name><name name-style="western"><surname>Cheplygina</surname><given-names>V</given-names> </name></person-group><article-title>Machine learning for medical imaging: methodological failures and recommendations for the future</article-title><source>NPJ Digit Med</source><year>2022</year><month>04</month><day>12</day><volume>5</volume><issue>1</issue><fpage>48</fpage><pub-id pub-id-type="doi">10.1038/s41746-022-00592-y</pub-id><pub-id pub-id-type="medline">35413988</pub-id></nlm-citation></ref><ref id="ref49"><label>49</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Yim</surname><given-names>J</given-names> </name><name name-style="western"><surname>Chopra</surname><given-names>R</given-names> </name><name name-style="western"><surname>Spitz</surname><given-names>T</given-names> </name><etal/></person-group><article-title>Predicting conversion to wet age-related macular degeneration using deep learning</article-title><source>Nat Med</source><year>2020</year><month>06</month><volume>26</volume><issue>6</issue><fpage>892</fpage><lpage>899</lpage><pub-id pub-id-type="doi">10.1038/s41591-020-0867-7</pub-id></nlm-citation></ref><ref id="ref50"><label>50</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Miladinovi&#x0107;</surname><given-names>A</given-names> </name><name name-style="western"><surname>Biscontin</surname><given-names>A</given-names> </name><name name-style="western"><surname>Aj&#x010D;evi&#x0107;</surname><given-names>M</given-names> </name><etal/></person-group><article-title>Evaluating deep learning models for classifying OCT images with limited data and noisy labels</article-title><source>Sci Rep</source><year>2024</year><month>12</month><day>5</day><volume>14</volume><issue>1</issue><fpage>30321</fpage><pub-id pub-id-type="doi">10.1038/s41598-024-81127-1</pub-id><pub-id pub-id-type="medline">39638854</pub-id></nlm-citation></ref><ref id="ref51"><label>51</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Li</surname><given-names>Y</given-names> </name><name name-style="western"><surname>El Habib Daho</surname><given-names>M</given-names> </name><name name-style="western"><surname>Conze</surname><given-names>PH</given-names> </name><etal/></person-group><article-title>A review of deep learning-based information fusion techniques for multimodal medical image classification</article-title><source>Comput Biol Med</source><year>2024</year><month>07</month><volume>177</volume><issue>108635</issue><fpage>108635</fpage><pub-id pub-id-type="doi">10.1016/j.compbiomed.2024.108635</pub-id></nlm-citation></ref><ref id="ref52"><label>52</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sosna</surname><given-names>J</given-names> </name><name name-style="western"><surname>Joskowicz</surname><given-names>L</given-names> </name><name name-style="western"><surname>Saban</surname><given-names>M</given-names> </name></person-group><article-title>Navigating the AI landscape in medical imaging: a critical analysis of technologies, implementation, and implications</article-title><source>Radiology</source><year>2025</year><month>06</month><volume>315</volume><issue>3</issue><fpage>e240982</fpage><pub-id pub-id-type="doi">10.1148/radiol.240982</pub-id><pub-id pub-id-type="medline">40552997</pub-id></nlm-citation></ref><ref id="ref53"><label>53</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Rashidisabet</surname><given-names>H</given-names> </name><name name-style="western"><surname>Sethi</surname><given-names>A</given-names> </name><name name-style="western"><surname>Jindarak</surname><given-names>P</given-names> </name><etal/></person-group><article-title>Validating the generalizability of ophthalmic artificial intelligence models on real-world clinical data</article-title><source>Transl Vis Sci Technol</source><year>2023</year><month>11</month><day>1</day><volume>12</volume><issue>11</issue><fpage>8</fpage><pub-id pub-id-type="doi">10.1167/tvst.12.11.8</pub-id><pub-id pub-id-type="medline">37922149</pub-id></nlm-citation></ref><ref id="ref54"><label>54</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Finlayson</surname><given-names>SG</given-names> </name><name name-style="western"><surname>Subbaswamy</surname><given-names>A</given-names> </name><name name-style="western"><surname>Singh</surname><given-names>K</given-names> </name><etal/></person-group><article-title>The clinician and dataset shift in artificial intelligence</article-title><source>N Engl J Med</source><year>2021</year><month>07</month><day>15</day><volume>385</volume><issue>3</issue><fpage>283</fpage><lpage>286</lpage><pub-id pub-id-type="doi">10.1056/NEJMc2104626</pub-id><pub-id pub-id-type="medline">34260843</pub-id></nlm-citation></ref><ref id="ref55"><label>55</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Choi</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Yu</surname><given-names>W</given-names> </name><name name-style="western"><surname>Nagarajan</surname><given-names>MB</given-names> </name><etal/></person-group><article-title>Translating AI to clinical practice: overcoming data shift with explainability</article-title><source>Radiographics</source><year>2023</year><month>05</month><volume>43</volume><issue>5</issue><fpage>e220105</fpage><pub-id pub-id-type="doi">10.1148/rg.220105</pub-id><pub-id pub-id-type="medline">37104124</pub-id></nlm-citation></ref><ref id="ref56"><label>56</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Collins</surname><given-names>GS</given-names> </name><name name-style="western"><surname>Moons</surname><given-names>KGM</given-names> </name><name name-style="western"><surname>Dhiman</surname><given-names>P</given-names> </name><etal/></person-group><article-title>TRIPOD+AI statement: updated guidance for reporting clinical prediction models that use regression or machine learning methods</article-title><source>BMJ</source><year>2024</year><month>04</month><day>16</day><volume>385</volume><fpage>e078378</fpage><pub-id pub-id-type="doi">10.1136/bmj-2023-078378</pub-id><pub-id pub-id-type="medline">38626948</pub-id></nlm-citation></ref><ref id="ref57"><label>57</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Rao</surname><given-names>VM</given-names> </name><name name-style="western"><surname>Hla</surname><given-names>M</given-names> </name><name name-style="western"><surname>Moor</surname><given-names>M</given-names> </name><etal/></person-group><article-title>Multimodal generative AI for medical image interpretation</article-title><source>Nature New Biol</source><year>2025</year><month>03</month><volume>639</volume><issue>8056</issue><fpage>888</fpage><lpage>896</lpage><pub-id pub-id-type="doi">10.1038/s41586-025-08675-y</pub-id><pub-id pub-id-type="medline">40140592</pub-id></nlm-citation></ref><ref id="ref58"><label>58</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Champendal</surname><given-names>M</given-names> </name><name name-style="western"><surname>M&#x00FC;ller</surname><given-names>H</given-names> </name><name name-style="western"><surname>Prior</surname><given-names>JO</given-names> </name><name name-style="western"><surname>Dos Reis</surname><given-names>CS</given-names> </name></person-group><article-title>A scoping review of interpretability and explainability concerning artificial intelligence methods in medical imaging</article-title><source>Eur J Radiol</source><year>2023</year><month>12</month><volume>169</volume><fpage>111159</fpage><pub-id pub-id-type="doi">10.1016/j.ejrad.2023.111159</pub-id><pub-id pub-id-type="medline">37976760</pub-id></nlm-citation></ref><ref id="ref59"><label>59</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Vasey</surname><given-names>B</given-names> </name><name name-style="western"><surname>Nagendran</surname><given-names>M</given-names> </name><name name-style="western"><surname>Campbell</surname><given-names>B</given-names> </name><etal/></person-group><article-title>Reporting guideline for the early stage clinical evaluation of decision support systems driven by artificial intelligence: DECIDE-AI</article-title><source>BMJ</source><year>2022</year><month>05</month><day>18</day><volume>377</volume><fpage>e070904</fpage><pub-id pub-id-type="doi">10.1136/bmj-2022-070904</pub-id><pub-id pub-id-type="medline">35584845</pub-id></nlm-citation></ref><ref id="ref60"><label>60</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sandhu</surname><given-names>S</given-names> </name><name name-style="western"><surname>Lin</surname><given-names>AL</given-names> </name><name name-style="western"><surname>Brajer</surname><given-names>N</given-names> </name><etal/></person-group><article-title>Integrating a machine learning system into clinical workflows: qualitative study</article-title><source>J Med Internet Res</source><year>2020</year><month>11</month><day>19</day><volume>22</volume><issue>11</issue><fpage>e22421</fpage><pub-id pub-id-type="doi">10.2196/22421</pub-id><pub-id pub-id-type="medline">33211015</pub-id></nlm-citation></ref></ref-list><app-group><supplementary-material id="app1"><label>Multimedia Appendix 1</label><p>Detailed methodology, search strategies, quality assessments, subgroup analyses, and sensitivity analyses.</p><media xlink:href="jmir_v28i1e97174_app1.pdf" xlink:title="PDF File, 10176 KB"/></supplementary-material><supplementary-material id="app2"><label>Checklist 1</label><p>PRISMA checklist.</p><media xlink:href="jmir_v28i1e97174_app2.pdf" xlink:title="PDF File, 220 KB"/></supplementary-material></app-group></back></article>