<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="research-article"><front><journal-meta><journal-id journal-id-type="nlm-ta">J Med Internet Res</journal-id><journal-id journal-id-type="publisher-id">jmir</journal-id><journal-id journal-id-type="index">1</journal-id><journal-title>Journal of Medical Internet Research</journal-title><abbrev-journal-title>J Med Internet Res</abbrev-journal-title><issn pub-type="epub">1438-8871</issn><publisher><publisher-name>JMIR Publications</publisher-name><publisher-loc>Toronto, Canada</publisher-loc></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">v28i1e88766</article-id><article-id pub-id-type="doi">10.2196/88766</article-id><article-categories><subj-group subj-group-type="heading"><subject>Original Paper</subject></subj-group></article-categories><title-group><article-title>Performance of AI Tools in Citing Retracted Literature : Content Analysis</article-title></title-group><contrib-group><contrib contrib-type="author" equal-contrib="yes"><name name-style="western"><surname>Labenbacher</surname><given-names>Sebastian</given-names></name><degrees>MD</degrees><xref ref-type="aff" rid="aff1"/><xref ref-type="fn" rid="equal-contrib1">*</xref></contrib><contrib contrib-type="author" equal-contrib="yes"><name name-style="western"><surname>Niederer</surname><given-names>Maximilian</given-names></name><degrees>MD</degrees><xref ref-type="aff" rid="aff1"/><xref ref-type="fn" rid="equal-contrib1">*</xref></contrib><contrib contrib-type="author" corresp="yes"><name name-style="western"><surname>Hammer</surname><given-names>Sascha</given-names></name><degrees>MBA, MD</degrees><xref ref-type="aff" rid="aff1"/></contrib><contrib contrib-type="author"><name name-style="western"><surname>Bader</surname><given-names>Matthias</given-names></name><degrees>MD</degrees><xref ref-type="aff" rid="aff1"/></contrib><contrib contrib-type="author"><name name-style="western"><surname>Schreiber</surname><given-names>Nikolaus</given-names></name><degrees>MD, PHD</degrees><xref ref-type="aff" rid="aff1"/></contrib><contrib contrib-type="author"><name name-style="western"><surname>Bornemann-Cimenti</surname><given-names>Helmar</given-names></name><degrees>MSc, MBA, MD</degrees><xref ref-type="aff" rid="aff1"/></contrib></contrib-group><aff id="aff1"><institution>Department of Anesthesiology and Intensive Care Medicine, Medical University of Graz</institution><addr-line>Auenbruggerplatz 5</addr-line><addr-line>Graz</addr-line><country>Austria</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Coristine</surname><given-names>Andrew</given-names></name></contrib></contrib-group><contrib-group><contrib contrib-type="reviewer"><name name-style="western"><surname>Patel</surname><given-names>Dip Bharatbhai</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Marshall</surname><given-names>Robert</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Mirji</surname><given-names>Shashank</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Singireddy</surname><given-names>Srikanth</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Foltynek</surname><given-names>Tomas</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to Sascha Hammer, MBA, MD, Department of Anesthesiology and Intensive Care Medicine, Medical University of Graz, Auenbruggerplatz 5, Graz, 8036, Austria, 43 316-385-81843; <email>sascha.hammer@medunigraz.at</email></corresp><fn fn-type="equal" id="equal-contrib1"><label>*</label><p>these authors contributed equally</p></fn></author-notes><pub-date pub-type="collection"><year>2026</year></pub-date><pub-date pub-type="epub"><day>1</day><month>5</month><year>2026</year></pub-date><volume>28</volume><elocation-id>e88766</elocation-id><history><date date-type="received"><day>01</day><month>12</month><year>2025</year></date><date date-type="rev-recd"><day>24</day><month>03</month><year>2026</year></date><date date-type="accepted"><day>28</day><month>03</month><year>2026</year></date></history><copyright-statement>&#x00A9; Sebastian Labenbacher, Maximilian Niederer, Sascha Hammer, Matthias Bader, Nikolaus Schreiber, Helmar Bornemann-Cimenti. Originally published in the Journal of Medical Internet Research (<ext-link ext-link-type="uri" xlink:href="https://www.jmir.org">https://www.jmir.org</ext-link>), 1.5.2026. </copyright-statement><copyright-year>2026</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in the Journal of Medical Internet Research (ISSN 1438-8871), is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="https://www.jmir.org/">https://www.jmir.org/</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://www.jmir.org/2026/1/e88766"/><abstract><sec><title>Background</title><p>Generative artificial intelligence (GenAI) tools are increasingly used in scientific research to support literature searches, evidence synthesis, and manuscript preparation. While these systems promise substantial efficiency gains, concerns have emerged regarding their reliability, particularly their tendency to cite inaccurate, fabricated, or retracted literature. The unrecognized inclusion of retracted studies poses a serious risk to research integrity and evidence-based decision-making. Whether commonly used GenAI tools can reliably detect, exclude, or transparently communicate the retraction status of scientific publications remains unclear.</p></sec><sec><title>Objective</title><p>This study aimed to evaluate the ability of freely available GenAI tools to correctly handle retracted scientific articles during literature searches. Primary and secondary outcomes focused on accuracy, reliability, and consistency in recognizing retracted literature.</p></sec><sec sec-type="methods"><title>Methods</title><p>In this pragmatic trial, nine widely used free-access GenAI tools (ChatGPT 4, ChatGPT 5, Claude, Gemini, Perplexity, Microsoft Copilot, SciSpace, ScienceOS, and Consensus) were evaluated. Each tool was asked five predefined, standardized questions addressing topic overview, article identification, article summarization, and explicit assessment of retraction status. Overall, 15 retracted articles (the 10 most cited and 5 most recently retracted as of May 23, 2025) were selected from the Retraction Watch database. All questions were repeated twice to assess intratool consistency. Responses were independently rated as correct or incorrect by 2 researchers. Descriptive statistics summarized performance, and comparisons between general-purpose and research-focused AI tools were conducted using descriptive statistics. Interreviewer agreement was assessed using Cohen kappa coefficient.</p></sec><sec sec-type="results"><title>Results</title><p>None of the evaluated AI tools consistently handled retracted articles correctly. No model achieved perfect accuracy across all question sets. ChatGPT 5 performed best, defined by the primary outcome of achieving fully correct responses to all five predefined tasks (5/5) for the highest number of retracted articles, correctly answering all five questions for 8 of 15 articles (53.3%). Research-focused tools (SciSpace, ScienceOS, and Consensus) failed to produce a single fully correct response set. Retracted articles were frequently included in topic overviews without warning, with error rates exceeding 40% in several tools. When specifically asked about retraction status, most systems failed to provide correct or complete information. OpenEvidence only reported data for a subset of our retracted articles as it is only used in health care literature. It demonstrated strong performance in topic overviews but low accuracy in identifying retracted articles.</p></sec><sec sec-type="conclusions"><title>Conclusions</title><p>Freely available GenAI tools are currently not able to detect, exclude, or appropriately flag retracted scientific literature. The widespread and confident reproduction of retracted studies represents a substantial threat to research integrity, particularly in medical and evidence-based fields. Until retraction-aware verification mechanisms are systematically integrated, independent source checking remains essential when using AI-assisted literature tools.</p></sec></abstract><kwd-group><kwd>artificial intelligence</kwd><kwd>AI</kwd><kwd>retraction of publication</kwd><kwd>retractions</kwd><kwd>scientific misconduct</kwd><kwd>evidence-based Practice</kwd><kwd>data accuracy</kwd><kwd>ethics</kwd></kwd-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><p>Artificial intelligence (AI) has gained substantial importance in scientific research in recent years, with growing use in text generation and automated knowledge retrieval [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref2">2</xref>]. Especially in health sciences, AI-based applications are being integrated into research workflows, and early evaluations suggest that these technologies can accelerate literature reviews, facilitate the synthesis of large datasets, and assist researchers and students in scientific communication [<xref ref-type="bibr" rid="ref3">3</xref>,<xref ref-type="bibr" rid="ref4">4</xref>]. An example of a generative artificial intelligence (GenAI) system developed for medical information gathering is OpenEvidence, which can exclusively be used by health care professionals. In particular, the ability of AI tools to efficiently process large volumes of information has been highlighted as a promising complement to established methods of evidence appraisal and synthesis [<xref ref-type="bibr" rid="ref5">5</xref>,<xref ref-type="bibr" rid="ref6">6</xref>].</p><p>At the same time, important limitations and risks of these technologies have become increasingly evident [<xref ref-type="bibr" rid="ref7">7</xref>,<xref ref-type="bibr" rid="ref8">8</xref>]. AI-assisted text generation may produce erroneous or fabricated references, thereby raising concerns about the integrity of GenAI-supported scientific work [<xref ref-type="bibr" rid="ref9">9</xref>]. Beyond the creation of non-existent citations, the inadvertent inclusion of retracted publications represents a critical issue that can seriously undermine the reliability of evidence syntheses [<xref ref-type="bibr" rid="ref10">10</xref>,<xref ref-type="bibr" rid="ref11">11</xref>]. Recent reports have demonstrated that large language models (LLMs) frequently cite retracted or unreliable studies without acknowledgment, often presenting them with high linguistic confidence and apparent authority [<xref ref-type="bibr" rid="ref12">12</xref>]. This behavior poses a direct threat to research integrity and may ultimately affect patient safety, particularly in evidence-based disciplines such as medicine, where retracted data can distort clinical conclusions.</p><p>Despite the rapid proliferation of AI tools for academic use, it remains unclear whether these systems can reliably detect, flag, or communicate the retraction status of scientific articles. Earlier investigations have focused primarily on single models or subject areas, leaving the broader cross-platform performance of GenAI largely unexplored [<xref ref-type="bibr" rid="ref13">13</xref>].</p><p>Even AI tools marketed for scientific and health care research&#x2014;such as those designed to assist literature searches or summarize findings&#x2014;have not been systematically tested for their ability to recognize retracted work. Against this background, this study evaluated nine widely used GenAI systems, including ChatGPT 5, ChatGPT 4, Claude, Gemini, Perplexity, Microsoft Copilot, SciSpace, ScienceOS, and Consensus, with OpenEvidence analyzed separately as a medical literature&#x2013;specific model. Using a predefined set of retracted clinical trials, each tool was assessed for factual accuracy, reliability, and risk of propagating retracted studies. The aim was to critically assess the reliability of AI-assisted literature searches and, based on these findings, to provide recommendations for their responsible and safe application in scientific practice.</p></sec><sec id="s2" sec-type="methods"><title>Methods</title><sec id="s2-1"><title>Study Design</title><p>This study was designed as a pragmatic evaluation using freely accessible artificial intelligence tools (AI) for literature screening and summarization of articles with five predefined questions to assess how retracted articles are handled. The trial was registered during the data acquisition phase in the Open Science Framework (osf.io) [<xref ref-type="bibr" rid="ref14">14</xref>].</p><p>The predefined questions were designed to outline a pathway for investigating a new topic of interest: providing an overview based on the article&#x2019;s keywords, identifying the most important research articles related to the defined topic, and summarizing the retracted article. The final two questions were directly aimed at the retraction status of the respective article. Each response was scored as correct (1) or incorrect (0).</p><p>We used AI tools specifically designed for scientific work (Consensus Version July 2025, SciSpace Version 1.5.1, ScienceOS Version July 2025, OpenEvidence Version July 2025) as well as generic AI tools (ChatGPT Version 4 and 5, Microsoft Copilot Version 3.0, Gemini Version 2.5, Perplexity AI Version July 2025, Claude AI Version 3.5). AI tools with trial versions insufficient to perform all necessary research questions were excluded.</p></sec><sec id="s2-2"><title>Ethical Considerations</title><p>This study did not involve human participants, patient data, or identifiable personal information and was limited to the analysis of publicly available literature and AI-generated outputs. In accordance with institutional and international research ethics guidelines, ethics committee approval was not required.</p></sec><sec id="s2-3"><title>Questions</title><p>The following questions were used to guide the literature assessment:</p><list list-type="order"><list-item><p>Give me an overview on publications of &#x201C;keywords of the article&#x201D;</p></list-item><list-item><p>Give relevant articles regarding the topic of &#x201C;keywords of the article&#x201D;</p></list-item><list-item><p>Summarise the article &#x201C;TITLE&#x201D; published by &#x201C;AUTHORS&#x201D; for me</p></list-item><list-item><p>Is the article &#x201C;TITLE&#x201D; published by &#x201C;AUTHORS&#x201D; retracted?</p></list-item><list-item><p>Why is &#x201C;TITLE&#x201D; published by &#x201C;AUTHORS&#x201D; retracted?</p></list-item></list><p>The exact titles (excluding RETRACTED or any other details on the retraction status), authors, and keywords we used can be viewed in the supplement (Table S1 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>, [<xref ref-type="bibr" rid="ref15">15</xref>-<xref ref-type="bibr" rid="ref29">29</xref>]).</p></sec><sec id="s2-4"><title>Scoring and Rating of Responses</title><p>A binary scoring system (one point per question) was used to emphasize functional correctness in AI-assisted literature workflows, acknowledging that this approach sacrifices linguistic nuance. Responses using equivocal or nonspecific wording (eg, &#x201C;controversial&#x201D; or &#x201C;heavily criticized&#x201D; instead of explicitly stating retraction) were rated as incorrect, as such phrasing does not clearly state retraction status. This conservative approach was chosen to avoid overestimating AI performance where precise identification of retracted literature is critical.</p></sec><sec id="s2-5"><title>Retracted Articles</title><sec id="s2-5-1"><title>Overview</title><p>To ensure objectivity, we used the ten most cited, retracted articles found in the Retraction Watch website [<xref ref-type="bibr" rid="ref30">30</xref>]. Furthermore, we used 5 articles, retracted closest to May 23, 2025 (extracted from the retraction-watch database [<xref ref-type="bibr" rid="ref31">31</xref>]) to compare whether the number of citations or the date of retraction has an impact on our results [<xref ref-type="bibr" rid="ref15">15</xref>-<xref ref-type="bibr" rid="ref29">29</xref>]. Details of the included articles can be found in Table S1 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> [<xref ref-type="bibr" rid="ref15">15</xref>-<xref ref-type="bibr" rid="ref29">29</xref>].</p><p>In the process of choosing the papers for our search, we did not find keywords for every article. Keywords were essential to our methods, and we therefore developed our own keywords for the articles. To ensure a uniform keyword generation process, we decided to let ChatGPT 4 choose the respective keywords for the articles. As ChatGPT is also a tool in evaluation, we performed an exploratory analysis of keyword generation restricted to general-purpose GenAI tools (ChatGPT 4, Claude, Gemini, and Perplexity), as these models were used for keyword generation in our study design. Research-focused AI tools were not included in this analysis.</p></sec><sec id="s2-5-2"><title>Study Aims</title><p>Our primary aim was to assess how often AI tools correctly handled retracted articles. Correct handling was defined as providing accurate responses to all 5 predefined questions, including appropriate identification and summarization of the article and correct recognition and explanation of its retraction status.</p></sec><sec id="s2-5-3"><title>Secondary Aim</title><p>The secondary aims of this study were as follows:</p><list list-type="order"><list-item><p>How often were retracted articles included in a topic overview (topic overviews include questions 1 and 2)</p></list-item><list-item><p>How often were the retracted articles included in the search pathway (search pathway includes questions 1, 2, and 3)</p></list-item><list-item><p>To analyze how often results differ when the same question is asked twice</p></list-item><list-item><p>To compare general AI tools (like ChatGPT) against AI tools specifically designed for research (like ScienceOS or Consensus)</p></list-item></list></sec><sec id="s2-5-4"><title>Exploratory Aims</title><p>The here-mentioned exploratory aims were not predefined but later on added to further improve and clarify the aforementioned aims.</p><sec id="s2-5-4-1"><title>Incidence of GenAI-hallucinations</title><p>To explore the incidence of GenAI-hallucinations, we screened question 1 and question 2 of each AI tool. Hallucinations were defined as the generation of non-existent or unverifiable publications; interpretive inaccuracies regarding article content or retraction reasons were not included in this definition. This screening was limited to checking the existence of each link and, when possible, comparing the trial details of each article described to the trial details of the corresponding link. Cases in which we found errors (like incorrect first author, incorrect journal,..) were recorded but not judged as hallucination since the respective article exists.</p></sec><sec id="s2-5-4-2"><title>Nonretracted Control Group</title><p>We retrospectively added a control group of nonretracted articles. This was deemed necessary to give an estimate on the ability of AI tools to find articles when provided with a list of keywords. Keywords were generated in the same fashion as described in the methods above; we only asked question 1 and question 2.</p></sec></sec></sec><sec id="s2-6"><title>Analysis</title><sec id="s2-6-1"><title>Data Extraction</title><p>Data were gathered in a predefined Microsoft Excel file with full-text answers and ratings. This process was performed by two independent researchers (MN and SL). After completion of the data extraction, results were compared between authors and, in case of uncertainty, discussed in the author group. After each article (with the 5 respective questions), a new chat session was opened.</p></sec><sec id="s2-6-2"><title>Statistical Analysis</title><p>Descriptive statistics were used to summarize the performance of each AI tool across all retracted articles and questions. For each tool, the proportion of correct answers was calculated separately for each of the five predefined questions as well as across all questions combined. Results were reported as absolute counts and percentages.</p><p>To address the primary outcome, the number of retracted articles for which an AI tool provided correct answers to all five questions was determined. This was expressed as frequency and percentage of the total.</p><p>For secondary outcomes, error rates were calculated as follows:</p><list list-type="order"><list-item><p>Inclusion of retracted articles in topic overviews (Questions 1 and 2): Proportion of instances where a retracted article was mentioned without retraction warning.</p></list-item><list-item><p>Missed retraction status in search pathway (Questions 1&#x2013;3): Proportion of instances in which a retracted article was included without an explicit warning of itsretraction status.</p></list-item><list-item><p>Intra-tool consistency: Agreement between duplicate question-answer pairs was assessed by calculating the percentage of conflicting ratings between reviewers for each AI tool.</p></list-item><list-item><p>Comparison between general-purpose AI tools and research-focused AI tools: Median number of correct answers per question was compared between the two groups.</p></list-item></list><p>Comparisons between general-purpose and research-focused AI tools were treated as exploratory. Differences between groups were summarized using descriptive statistics and effect size estimates, without formal hypothesis testing, due to the limited number of tools per group.</p><p>Interreviewer agreement during data extraction was assessed using Cohen kappa coefficient (&#x03BA;), with percentage agreement reported as a complementary measure.</p><p>All analyses were conducted using R (version 4.3.2; R Foundation for Statistical Computing) and Microsoft Excel 2023 for data entry and descriptive summaries.</p></sec></sec></sec><sec id="s3" sec-type="results"><title>Results</title><sec id="s3-1"><title>Overview</title><p>Data extraction was performed between June and September 2025. A total of 675 prompts were submitted, each asked twice across all models.</p><p>The primary aim of this analysis was to assess how often each GenAI tool achieved a perfect score of 5/5 correct answers per question set. None of the evaluated models consistently achieved full accuracy. Three tools - SciSpace, ScienceOS, and Consensus - failed to provide any completely correct responses. The best-performing model was ChatGPT 5, which achieved 8 out of 15 perfect results (53.3%). Model-specific performance data can be seen in <xref ref-type="fig" rid="figure1">Figure 1</xref>.</p><fig position="float" id="figure1"><label>Figure 1.</label><caption><p>Performance of nine freely available generative artificial intelligence (AI) tools in a pragmatic cross-sectional evaluation of their ability to correctly handle retracted scientific publications. Shown is the proportion of retracted articles (n=15) for which each artificial intelligence tool provided fully correct responses.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="jmir_v28i1e88766_fig01.png"/></fig></sec><sec id="s3-2"><title>Retracted Articles in Topic Overviews and Search Pathway</title><p>To assess the risk of incorporating retracted studies into literature syntheses, topic overviews and combined topic overviews with article summaries were analyzed separately.</p><p>When examining only topic overviews (ie, the first two questions), Microsoft Copilot gave an incorrect answer in 2 (13.3%) out of 15 cases. SciSpace, on the other hand, had the most incorrect cases with 8 out of 15 cases. Exact model performances are presented in the supplementary file figure S1 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> and <xref ref-type="table" rid="table1">Table 1</xref>.</p><p>To assess the risk of incorporating retracted studies into literature syntheses, topic overviews, and combined topic overviews with article summaries were analyzed separately.</p><table-wrap id="t1" position="float"><label>Table 1.</label><caption><p>Primary analysis of nine freely available generative artificial intelligence tools in a pragmatic cross-sectional study evaluating the handling of retracted scientific literature. For each tool, the table reports the number and percentage of retracted articles (n=15) with fully correct responses to all five predefined questions, the frequency of unflagged inclusion of retracted articles in topic overviews, and intratool reliability based on discrepancies between repeated responses.</p></caption><table id="table1" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">GenAI<sup><xref ref-type="table-fn" rid="table1fn1">a</xref></sup></td><td align="left" valign="bottom">All 5 correct, n (%)</td><td align="left" valign="bottom">Retracted article in topic overviews, n (%)</td><td align="left" valign="bottom">Reliability, n (%)</td></tr></thead><tbody><tr><td align="left" valign="top">ChatGPT 4</td><td align="left" valign="top">6 (40)</td><td align="left" valign="top">3 (20)</td><td align="left" valign="top">2 (2.67)</td></tr><tr><td align="left" valign="top">ChatGPT 5</td><td align="left" valign="top">8 (53.34)</td><td align="left" valign="top">3 (20)</td><td align="left" valign="top">5 (6.67)</td></tr><tr><td align="left" valign="top">Microsoft Copilot</td><td align="left" valign="top">4 (26.67)</td><td align="left" valign="top">2 (13.34)</td><td align="left" valign="top">8 (10.67)</td></tr><tr><td align="left" valign="top">Gemini</td><td align="left" valign="top">6 (40)</td><td align="left" valign="top">3 (20)</td><td align="left" valign="top">12 (16)</td></tr><tr><td align="left" valign="top">SciSpace</td><td align="left" valign="top">0 (0)</td><td align="left" valign="top">8 (53.34)</td><td align="left" valign="top">7 (9.33)</td></tr><tr><td align="left" valign="top">ScienceOS</td><td align="left" valign="top">0 (0)</td><td align="left" valign="top">6 (40)</td><td align="left" valign="top">8 (10.67)</td></tr><tr><td align="left" valign="top">Consensus</td><td align="left" valign="top">0 (0)</td><td align="left" valign="top">6 (40)</td><td align="left" valign="top">19 (25.33)</td></tr><tr><td align="left" valign="top">Claude</td><td align="left" valign="top">7 (46.67)</td><td align="left" valign="top">6 (40)</td><td align="left" valign="top">12 (16)</td></tr><tr><td align="left" valign="top">Perplexity</td><td align="left" valign="top">5 (33.34)</td><td align="left" valign="top">6 (40)</td><td align="left" valign="top">17 (22.67)</td></tr></tbody></table><table-wrap-foot><fn id="table1fn1"><p><sup>a</sup>GenAI: generative artificial intelligence.</p></fn></table-wrap-foot></table-wrap></sec><sec id="s3-3"><title>Reliability of Tools</title><p>Each GenAI tool was asked every question twice to assess consistency. Differences between repeated answers resulting in a rating change occurred in each GenAI tool. ChatGPT 4 had the lowest variability with only two inconsistent answers (2.67%). Consensus had the highest incidence with 19 differences, approximately 1 in every 4 questions. Detailed results for each AI are shown in <xref ref-type="table" rid="table1">Table 1</xref>. We additionally calculated Cohen kappa, which ranged between 0.43 in Perplexity to 0.92 in ChatGPT, with an overall Cohen kappa of 0.73 (table S3 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>)</p></sec><sec id="s3-4"><title>Comparison of General versus Scientific GenAI Tools</title><p>General-purpose AI tools achieved a median of 6 fully correct responses out of 15 (range 4&#x2010;8), corresponding to an aggregated accuracy of 40%. In contrast, research-focused tools did not achieve any fully correct responses (median 0, range 0&#x2010;0). The absolute difference in aggregated proportions between groups was 40 percentage points. Given the limited number of tools per group, this comparison is descriptive and exploratory.</p></sec><sec id="s3-5"><title>Comparison of Keywords</title><p>Keyword overlap was assessed using a moderately generous matching strategy designed to account for realistic linguistic variation while maintaining methodological rigor. Specifically, orthographic variants (eg, &#x201C;COVID-19&#x201D; vs &#x201C;covid19&#x201D;), singular and plural forms (eg, &#x201C;risk factor&#x201D; vs &#x201C;risk factors&#x201D;), hyphenation differences (eg, &#x201C;meta-analysis&#x201D; vs &#x201C;meta analysis&#x201D;), and established abbreviations versus full terms (eg, &#x201C;RCT&#x201D; vs &#x201C;randomized controlled trial,&#x201D; &#x201C;AI&#x201D; vs &#x201C;artificial intelligence&#x201D;) were treated as equivalent. In addition, clearly synonymous or lexically overlapping technical terms referring to the same scientific entity or method (eg, &#x201C;Alzheimer&#x2019;s disease&#x201D; vs &#x201C;Alzheimer pathology,&#x201D; or &#x201C;clinical trials&#x201D; vs &#x201C;clinical trial&#x201D;) were considered matches. In contrast, broader conceptual or hierarchical relationships were explicitly excluded from matching; for example, general versus specific terms (eg, &#x201C;mental health&#x201D; vs &#x201C;depression&#x201D;), different abstraction levels (eg, &#x201C;hypertension&#x201D; vs &#x201C;blood pressure control&#x201D;), or loosely related thematic concepts (eg, &#x201C;public health&#x201D; vs &#x201C;disease prevention&#x201D;) were not treated as equivalent. Pairwise comparisons showed substantial keyword overlap across all model pairs, ranging from 68% to 78%. The greatest overlap was observed between Claude and Perplexity (78%), followed by ChatGPT-4 and Perplexity (74%). ChatGPT-4 and Claude, and Gemini and Claude, each demonstrated 72% overlap, while Gemini and Perplexity showed 70% overlap and ChatGPT-4 and Gemini showed the lowest overlap at 68%. Comparable overlap magnitudes are observed both in comparisons involving the reference model and in comparisons among nonreference models, indicating a high degree of thematic convergence across LLMs. This matching strategy was selected to balance linguistic flexibility with analytical conservatism, avoiding both overly strict string-based matching and overly permissive conceptual aggregation. Nonetheless, we want to raise awareness that ChatGPT may have had a benefit as the keywords used for our search were generated with ChatGPT 4 and no perfect overlap was shown between AI tools.</p></sec></sec><sec id="s4" sec-type="discussion"><title>Discussion</title><sec id="s4-1"><title>Principal Findings and Interpretation</title><p>In this cross-sectional evaluation of 9 freely available GenAI tools, none were able to consistently recognize or correctly handle retracted scientific publications. Across all 5 predefined research tasks, no model achieved perfect accuracy, and even the best-performing system, ChatGPT 5, correctly processed less than two-thirds of retracted articles (8 of 15; <xref ref-type="fig" rid="figure1">Figure 1</xref>). By contrast, AI tools marketed specifically for scientific use&#x2014;Consensus, SciSpace, and ScienceOS&#x2014;performed particularly poorly, each failing to produce a single fully correct set of answers. As shown in <xref ref-type="table" rid="table1">Table 1</xref>, intra-tool variability was observed across all GenAI systems, with discordance rates between repeated queries ranging from 2.67% in ChatGPT 4% to 25.33% in Consensus, indicating that even the most consistent model failed to produce fully stable responses. These findings collectively indicate that current GenAI systems cannot reliably detect or flag retracted literature, even when explicitly queried about retraction status.</p><p>The key result of this study is the persistent inability of LLMs to identify retracted papers or to transparently communicate their status to users. This deficiency was observed both in exploratory topic overviews and in direct article queries, suggesting that the problem is systemic rather than situational. As illustrated in <xref ref-type="fig" rid="figure2">Figure 2</xref>, the models most frequently cited retracted studies without warning, and even when asked whether a given article had been retracted, many systems returned incorrect statements. These outcomes highlight a fundamental gap in the factual grounding of current AI tools&#x2014;a limitation that directly undermines their use in scientific research and evidence synthesis. Our analysis cannot determine the exact reason why retracted articles are not marked correctly. Possibilities include the insufficient data retrieval from the original databases (including missing source details or incomplete retrieval) or errors in the reasoning of the respective GenAI. Nevertheless, the scientific end user, who ultimately carries the responsibility for the correctness of his or her article, should be aware of and cautious of this risk.</p><p>The contrast between general-purpose and domain-specific AI systems is notable. While one might expect specialized research tools to perform better, they were instead among the weakest performers. Several of these systems rely on static, proprietary databases that are not regularly synchronized with dynamic resources such as PubMed or CrossMark. This architectural rigidity may explain their inability to capture recent retraction events, especially among newly withdrawn papers included in our sample. Conversely, models such as ChatGPT 5 and Claude, which use broader retrieval mechanisms and more frequent model updates, achieved somewhat higher accuracy but still lacked robustness and transparency. Importantly, the results presented represent a performance snapshot of freely accessible AI tools at the time of data collection and may not generalize to subscription-based versions or to future model iterations, which may incorporate improved retrieval mechanisms or retraction-aware features.</p><fig position="float" id="figure2"><label>Figure 2.</label><caption><p>Accuracy of freely available generative artificial intelligence (AI) tools across the first three predefined literature-search tasks: topic overview, identification of relevant articles, and summarization of a specified retracted publication.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="jmir_v28i1e88766_fig02.png"/></fig></sec><sec id="s4-2"><title>Comparison With Previous Literature</title><p>Our findings extend previous work that has documented the use of retracted literature by GenAI. Gu et al [<xref ref-type="bibr" rid="ref11">11</xref>] reported that ChatGPT produced answers referencing retracted oncology studies, without acknowledging their retraction status, in more than 70% of tested prompts. Similarly, Jan et al [<xref ref-type="bibr" rid="ref10">10</xref>] and Ge et al [<xref ref-type="bibr" rid="ref6">6</xref>] have shown that GenAI tools frequently fabricate or misattribute citations during literature summarization tasks. Importantly, these earlier studies typically evaluated individual models or single fields within the scientific literature. The present analysis broadens the evidence base by systematically comparing multiple AI systems across heterogeneous scientific domains, including both general and medical literature. The consistency of failure across all tools underscores that this is not an isolated design flaw but a pervasive, cross-platform limitation in current GenAI architectures.</p><p>The continued citation and reuse of retracted work represent a well-recognized threat to research integrity and, in clinical disciplines, may even interfere with patient safety as illustrated by the infamous article of Wakefield et al [<xref ref-type="bibr" rid="ref13">13</xref>,<xref ref-type="bibr" rid="ref29">29</xref>,<xref ref-type="bibr" rid="ref32">32</xref>]. Our results demonstrate that GenAI tools risk amplifying this long-standing problem by automating the propagation of unreliable information. Particularly concerning is the tendency of GenAI tools to reproduce incorrect content with high linguistic confidence, which may give users a false sense of authority and discourage verification. Without built-in mechanisms to cross-check bibliographic metadata or retraction notices, AI-assisted literature synthesis may inadvertently erode rather than enhance scientific reliability.</p></sec><sec id="s4-3"><title>Implications for Research and Clinical Practice</title><p>Given the accelerating adoption of AI systems in academic and clinical workflows, these findings have immediate practical implications. Researchers increasingly use GenAI tools to generate literature summaries, draft background sections, and identify relevant studies. In medicine and health sciences, where evidence accuracy is critical, the unrecognized inclusion of retracted data could distort systematic reviews, meta-analyses, and ultimately clinical recommendations. As such, reliance on unverified AI output in research and clinical contexts may indirectly contribute to patient harm by propagating retracted or unreliable evidence.</p><p>Importantly, expectations regarding the handling of retracted literature may reasonably differ depending on the intended purpose of an AI system. For AI tools explicitly designed to support scientific research or evidence synthesis, excluding retracted publications by default or clearly flagging their status aligns with established principles of research integrity and evidence-based practice. In contrast, for general-purpose AI systems, outright exclusion may not always be appropriate, as retracted articles can still be relevant for historical, methodological, or meta-scientific purposes. In such cases, clearly communicating the retraction status to users may represent a more practical and responsible design approach. Given that retracted information cannot be removed from a trained LLM, handling is more realistically achieved through post-generation checks rather than through changes to the underlying model itself.</p><p>To mitigate this risk, several structural measures should be prioritized. AI developers should integrate real-time bibliographic verification pipelines using authoritative sources such as Crossref or PubMed Retraction Notices. AI interfaces intended for academic use should provide transparent confidence indicators or citation provenance, enabling users to identify potentially unreliable results. Journals and peer reviewers should require explicit disclosure of any AI assistance in literature searches and verify cited sources independently. These steps are consistent with recent policy recommendations emphasizing &#x201C;responsible AI use&#x201D; and &#x201C;model interpretability&#x201D; in scientific contexts [<xref ref-type="bibr" rid="ref33">33</xref>]. These recommendations align with JMIR editorial policies and ICMJE guidance, which emphasize transparent disclosure of AI use, author responsibility for content accuracy, and independent verification of cited sources.</p></sec><sec id="s4-4"><title>Strengths and Limitations</title><p>This study offers a pragmatic, reproducible evaluation of AI performance under conditions that mirror real-world research behavior. By using predefined and repeated queries on a diverse set of retracted papers, the design captured both intermodel variability and intramodel inconsistency (<xref ref-type="table" rid="table1">Table 1</xref>). The inclusion of the 10 most-cited and 5 most-recently retracted papers provided a balanced sample, revealing that both historically influential and newly withdrawn studies were equally mishandled.</p><p>Nonetheless, several limitations merit consideration. First, the study was confined to freely accessible versions of each AI tool, excluding subscription models that may have more advanced retrieval capabilities and was taken at a specific point in time. Second, while the test set was broad, it remains a small subset of the retraction landscape and may not generalize to all fields, especially since the majority of articles (13/15, 86.7%) were medical articles. While this reflects the intended application of many AI-assisted literature workflows, the predominance of biomedical publications in our sample may limit the generalizability of these findings to other scientific fields, where differing publication cultures, citation structures, and retraction practices could influence AI performance. Third, AI models evolve rapidly; the specific results reported here represent a snapshot of performance as of mid-2025 and may not reflect future iterations. Fourth, given the limited number of retracted articles (n=15), the study was not powered to detect small-to-moderate differences between tools, and nonsignificant results should therefore be interpreted cautiously due to the risk of type II error. Accordingly, between-tool comparisons were considered exploratory and intended to describe observed performance patterns rather than to support confirmatory claims. Finally, the binary scoring approach (correct vs incorrect) captures factual accuracy but does not account for nuances such as ambiguous phrasing (heavily criticized and scrutinized). While distinguishing whether AI systems correctly identify the specific reasons for retraction could provide additional nuance, our analysis intentionally focused on the more fundamental step of detecting and flagging retracted literature, as failure at this stage already represents a critical risk for research integrity.</p><p>These factors should be addressed in future validation studies.</p><p>This study has several methodological flaws. One being that our way of questioning needed a uniform, comparable way. This was achieved by combining the keywords in a single prompt string, which could differ from a prompt by a common user searching for a specific topic. Furthermore, there are multiple ways why a retracted article was not cited by an AI. This could be due to the fact that the AI decided against citing it, due to an incomplete or outdated database, due to the fact that this article does not report data of interest, or that there are better articles describing similar aspects. In conclusion, we cannot specifically differentiate between all of those possibilities and therefore decided to create real-world judgment in which we just decide whether this article was included without warning or not. We additionally created an exploratory control group in which we used publication-order matched articles to estimate the ability of each AI to detect a specific article. While the results showed that AI tools found the respective article in up to 90% of cases, we want to highlight the limitation that this extraction was performed in newer iterations of each AI tool.</p></sec><sec id="s4-5"><title>Future Directions</title><p>As GenAI becomes increasingly embedded in research ecosystems, systematic benchmarking and transparent reporting of factual reliability are essential. Future studies should expand sample sizes, include subscription-based models, and explore automated cross-validation with bibliometric databases. In parallel, developers should implement retraction-aware pipelines capable of dynamic metadata synchronization. Collaborative efforts between publishers, AI developers, and database curators could enable &#x201C;AI integrity layers&#x201D; that automatically flag or exclude retracted content before output generation. Ultimately, ensuring that AI systems respect the integrity of the scientific record is both a technical and ethical imperative.</p></sec><sec id="s4-6"><title>Conclusions</title><p>No currently available free-access GenAI system can be considered reliable for detecting or handling retracted literature. Even the best-performing models failed to consistently recognize the retraction status of scientific articles, while research-focused tools performed particularly poorly. As AI tools become ubiquitous in the preparation and evaluation of scientific work, independent verification of sources and explicit retraction checks remain indispensable to maintain research integrity.</p></sec></sec></body><back><ack><p>The authors declare that no generative artificial intelligence tools were used in the writing of any portion of this manuscript.</p></ack><notes><sec><title>Funding</title><p>No external fundings were received for this study.</p></sec><sec><title>Data Availability</title><p>The data that support the findings of this study are available from the corresponding author (SH) upon reasonable request.</p></sec></notes><fn-group><fn fn-type="con"><p>Contributed to the idea, hypothesis, and study design: HBC, MB, MN, NS, SH, SL</p><p>Contributed to data acquisition: MN, SH, SL</p><p>Assembled the data and performed the statistical analysis: HBC, MB, NS, SL</p><p>Interpreted the data and prepared the first draft of the manuscript: MB, MN, NS, SH, SL</p><p>Commented on, edited, reviewed, and finally approved the last version of the manuscript: HBC, MB, MN, NS, SH, SL</p><p>Authors SL and MN share the first authorship.</p></fn><fn fn-type="conflict"><p>None declared.</p></fn></fn-group><glossary><title>Abbreviations</title><def-list><def-item><term id="abb1">AI</term><def><p>artificial intelligence</p></def></def-item><def-item><term id="abb2">GenAI</term><def><p>generative artificial intelligence</p></def></def-item><def-item><term id="abb3">LLM</term><def><p>large language model</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Golan</surname><given-names>R</given-names> </name><name name-style="western"><surname>Reddy</surname><given-names>R</given-names> </name><name name-style="western"><surname>Muthigi</surname><given-names>A</given-names> </name><name name-style="western"><surname>Ramasamy</surname><given-names>R</given-names> </name></person-group><article-title>Artificial intelligence in academic writing: a paradigm-shifting technological advance</article-title><source>Nat Rev Urol</source><year>2023</year><month>06</month><volume>20</volume><issue>6</issue><fpage>327</fpage><lpage>328</lpage><pub-id pub-id-type="doi">10.1038/s41585-023-00746-x</pub-id><pub-id pub-id-type="medline">36829078</pub-id></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Khalifa</surname><given-names>M</given-names> </name><name name-style="western"><surname>Albadawy</surname><given-names>M</given-names> </name></person-group><article-title>Using artificial intelligence in academic writing and research: an essential productivity tool</article-title><source>Computer Methods and Programs in Biomedicine Update</source><year>2024</year><volume>5</volume><fpage>100145</fpage><pub-id pub-id-type="doi">10.1016/j.cmpbup.2024.100145</pub-id></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Dwivedi</surname><given-names>YK</given-names> </name><name name-style="western"><surname>Kshetri</surname><given-names>N</given-names> </name><name name-style="western"><surname>Hughes</surname><given-names>L</given-names> </name><etal/></person-group><article-title>Opinion Paper: &#x201C;So what if ChatGPT wrote it?&#x201D; Multidisciplinary perspectives on opportunities, challenges and implications of generative conversational AI for research, practice and policy</article-title><source>Int J Inf Manage</source><year>2023</year><month>08</month><volume>71</volume><fpage>102642</fpage><pub-id pub-id-type="doi">10.1016/j.ijinfomgt.2023.102642</pub-id></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Dergaa</surname><given-names>I</given-names> </name><name name-style="western"><surname>Chamari</surname><given-names>K</given-names> </name><name name-style="western"><surname>Zmijewski</surname><given-names>P</given-names> </name><name name-style="western"><surname>Ben Saad</surname><given-names>H</given-names> </name></person-group><article-title>From human writing to artificial intelligence generated text: examining the prospects and potential threats of ChatGPT in academic writing</article-title><source>Biol Sport</source><year>2023</year><month>04</month><volume>40</volume><issue>2</issue><fpage>615</fpage><lpage>622</lpage><pub-id pub-id-type="doi">10.5114/biolsport.2023.125623</pub-id><pub-id pub-id-type="medline">37077800</pub-id></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>de la Torre-L&#x00F3;pez</surname><given-names>J</given-names> </name><name name-style="western"><surname>Ram&#x00ED;rez</surname><given-names>A</given-names> </name><name name-style="western"><surname>Romero</surname><given-names>JR</given-names> </name></person-group><article-title>Artificial intelligence to automate the systematic review of scientific literature</article-title><source>Computing</source><year>2023</year><month>10</month><volume>105</volume><issue>10</issue><fpage>2171</fpage><lpage>2194</lpage><pub-id pub-id-type="doi">10.1007/s00607-023-01181-x</pub-id></nlm-citation></ref><ref id="ref6"><label>6</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ge</surname><given-names>L</given-names> </name><name name-style="western"><surname>Agrawal</surname><given-names>R</given-names> </name><name name-style="western"><surname>Singer</surname><given-names>M</given-names> </name><etal/></person-group><article-title>Leveraging artificial intelligence to enhance systematic reviews in health research: advanced tools and challenges</article-title><source>Syst Rev</source><year>2024</year><month>10</month><day>25</day><volume>13</volume><issue>1</issue><fpage>269</fpage><pub-id pub-id-type="doi">10.1186/s13643-024-02682-2</pub-id><pub-id pub-id-type="medline">39456077</pub-id></nlm-citation></ref><ref id="ref7"><label>7</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Khlaif</surname><given-names>ZN</given-names> </name><name name-style="western"><surname>Mousa</surname><given-names>A</given-names> </name><name name-style="western"><surname>Hattab</surname><given-names>MK</given-names> </name><etal/></person-group><article-title>The potential and concerns of using AI in scientific research: ChatGPT performance evaluation</article-title><source>JMIR Med Educ</source><year>2023</year><month>09</month><day>14</day><volume>9</volume><fpage>e47049</fpage><pub-id pub-id-type="doi">10.2196/47049</pub-id><pub-id pub-id-type="medline">37707884</pub-id></nlm-citation></ref><ref id="ref8"><label>8</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>M&#x00E1;jovsk&#x00FD;</surname><given-names>M</given-names> </name><name name-style="western"><surname>&#x010C;ern&#x00FD;</surname><given-names>M</given-names> </name><name name-style="western"><surname>Kasal</surname><given-names>M</given-names> </name><name name-style="western"><surname>Komarc</surname><given-names>M</given-names> </name><name name-style="western"><surname>Netuka</surname><given-names>D</given-names> </name></person-group><article-title>Artificial intelligence can generate fraudulent but authentic-looking scientific medical articles: pandora&#x2019;s box has been opened</article-title><source>J Med Internet Res</source><year>2023</year><month>05</month><day>31</day><volume>25</volume><fpage>e46924</fpage><pub-id pub-id-type="doi">10.2196/46924</pub-id><pub-id pub-id-type="medline">37256685</pub-id></nlm-citation></ref><ref id="ref9"><label>9</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Montalbano</surname><given-names>P</given-names> </name></person-group><article-title>Ethical and legal considerations of medical artificial intelligence</article-title><source>Prim Care</source><year>2025</year><month>12</month><volume>52</volume><issue>4</issue><fpage>769</fpage><lpage>779</lpage><pub-id pub-id-type="doi">10.1016/j.pop.2025.07.009</pub-id><pub-id pub-id-type="medline">41110915</pub-id></nlm-citation></ref><ref id="ref10"><label>10</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Jan</surname><given-names>R</given-names> </name></person-group><article-title>Examining the reliability of ChatGPT: identifying retracted scientific literature and ensuring accurate citations and references</article-title><source>Impacts of Generative AI on the Future of Research and Education</source><year>2024</year><publisher-name>IGI Global Scientific Publishing</publisher-name><fpage>367</fpage><lpage>392</lpage><pub-id pub-id-type="doi">10.4018/979-8-3693-0884-4.ch014</pub-id></nlm-citation></ref><ref id="ref11"><label>11</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Gu</surname><given-names>T</given-names> </name><name name-style="western"><surname>Feng</surname><given-names>H</given-names> </name><name name-style="western"><surname>Li</surname><given-names>M</given-names> </name><name name-style="western"><surname>Gu</surname><given-names>W</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>G</given-names> </name></person-group><article-title>Alarm: retracted articles on cancer imaging are not only continuously cited by publications but also used by ChatGPT to answer questions</article-title><source>J Adv Res</source><year>2025</year><month>05</month><volume>71</volume><fpage>1</fpage><lpage>3</lpage><pub-id pub-id-type="doi">10.1016/j.jare.2025.03.020</pub-id><pub-id pub-id-type="medline">40086628</pub-id></nlm-citation></ref><ref id="ref12"><label>12</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kocyigit</surname><given-names>BF</given-names> </name><name name-style="western"><surname>Okyay</surname><given-names>RA</given-names> </name><name name-style="western"><surname>Seiil</surname><given-names>B</given-names> </name><name name-style="western"><surname>Qumar</surname><given-names>AB</given-names> </name><name name-style="western"><surname>Sumbul</surname><given-names>HE</given-names> </name></person-group><article-title>Analysis of retracted publications on artificial intelligence: trends, ethical concerns, and scientific integrity</article-title><source>J Korean Med Sci</source><year>2025</year><month>11</month><day>17</day><volume>40</volume><issue>44</issue><fpage>e280</fpage><pub-id pub-id-type="doi">10.3346/jkms.2025.40.e280</pub-id><pub-id pub-id-type="medline">41250649</pub-id></nlm-citation></ref><ref id="ref13"><label>13</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Bakker</surname><given-names>C</given-names> </name><name name-style="western"><surname>Boughton</surname><given-names>S</given-names> </name><name name-style="western"><surname>Faggion</surname><given-names>CM</given-names> </name><name name-style="western"><surname>Fanelli</surname><given-names>D</given-names> </name><name name-style="western"><surname>Kaiser</surname><given-names>K</given-names> </name><name name-style="western"><surname>Schneider</surname><given-names>J</given-names> </name></person-group><article-title>Reducing the residue of retractions in evidence synthesis: ways to minimise inappropriate citation and use of retracted data</article-title><source>BMJ Evid Based Med</source><year>2024</year><month>03</month><day>21</day><volume>29</volume><issue>2</issue><fpage>121</fpage><lpage>126</lpage><pub-id pub-id-type="doi">10.1136/bmjebm-2022-111921</pub-id><pub-id pub-id-type="medline">37463764</pub-id></nlm-citation></ref><ref id="ref14"><label>14</label><nlm-citation citation-type="web"><article-title>AI-assisted knowledge gathering and its pitfalls with retracted articles</article-title><source>Open Science Framework (OSF)</source><access-date>2026-04-13</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://doi.org/10.17605/OSF.IO/B6J2W">https://doi.org/10.17605/OSF.IO/B6J2W</ext-link></comment></nlm-citation></ref><ref id="ref15"><label>15</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Bohsas</surname><given-names>H</given-names> </name><name name-style="western"><surname>Alibrahim</surname><given-names>H</given-names> </name><name name-style="western"><surname>Swed</surname><given-names>S</given-names> </name><etal/></person-group><article-title>Prevalence and knowledge of polycystic ovary syndrome (PCOS) and health-related practices among women of Syria: a cross-sectional study</article-title><source>J Psychosom Obstet Gynaecol</source><year>2024</year><month>12</month><volume>45</volume><issue>1</issue><fpage>2318194</fpage><pub-id pub-id-type="doi">10.1080/0167482X.2024.2318194</pub-id><pub-id pub-id-type="medline">38635351</pub-id></nlm-citation></ref><ref id="ref16"><label>16</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Estruch</surname><given-names>R</given-names> </name><name name-style="western"><surname>Ros</surname><given-names>E</given-names> </name><name name-style="western"><surname>Salas-Salvad&#x00F3;</surname><given-names>J</given-names> </name><etal/></person-group><article-title>Primary prevention of cardiovascular disease with a Mediterranean diet</article-title><source>N Engl J Med</source><year>2013</year><month>04</month><day>4</day><volume>368</volume><issue>14</issue><fpage>1279</fpage><lpage>1290</lpage><pub-id pub-id-type="doi">10.1056/NEJMoa1200303</pub-id><pub-id pub-id-type="medline">23432189</pub-id></nlm-citation></ref><ref id="ref17"><label>17</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Fortuna</surname><given-names>F</given-names> </name><name name-style="western"><surname>Gonzalez</surname><given-names>D</given-names> </name><name name-style="western"><surname>Fritzler</surname><given-names>A</given-names> </name><etal/></person-group><article-title>Burnout components, perceived stress and hair cortisol in healthcare professionals during the second wave of COVID 19 pandemic</article-title><source>Sci Rep</source><year>2024</year><month>11</month><day>21</day><volume>14</volume><issue>1</issue><fpage>28828</fpage><pub-id pub-id-type="doi">10.1038/s41598-024-79925-8</pub-id><pub-id pub-id-type="medline">39572609</pub-id></nlm-citation></ref><ref id="ref18"><label>18</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Fukuhara</surname><given-names>A</given-names> </name><name name-style="western"><surname>Matsuda</surname><given-names>M</given-names> </name><name name-style="western"><surname>Nishizawa</surname><given-names>M</given-names> </name><etal/></person-group><article-title>RETRACTED: Visfatin: a protein secreted by visceral fat that mimics the effects of insulin</article-title><source>Science</source><year>2005</year><month>01</month><day>21</day><volume>307</volume><issue>5708</issue><fpage>426</fpage><lpage>430</lpage><pub-id pub-id-type="doi">10.1126/science.1097243</pub-id><pub-id pub-id-type="medline">15604363</pub-id></nlm-citation></ref><ref id="ref19"><label>19</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Gautret</surname><given-names>P</given-names> </name><name name-style="western"><surname>Lagier</surname><given-names>JC</given-names> </name><name name-style="western"><surname>Parola</surname><given-names>P</given-names> </name><etal/></person-group><article-title>RETRACTED: Hydroxychloroquine and azithromycin as a treatment of COVID-19: results of an open-label non-randomized clinical trial</article-title><source>Int J Antimicrob Agents</source><year>2020</year><month>07</month><volume>56</volume><issue>1</issue><fpage>105949</fpage><pub-id pub-id-type="doi">10.1016/j.ijantimicag.2020.105949</pub-id><pub-id pub-id-type="medline">32205204</pub-id></nlm-citation></ref><ref id="ref20"><label>20</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Huang</surname><given-names>C</given-names> </name><name name-style="western"><surname>Huang</surname><given-names>L</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>Y</given-names> </name><etal/></person-group><article-title>6-month consequences of COVID-19 in patients discharged from hospital: a cohort study</article-title><source>Lancet</source><year>2021</year><month>01</month><day>16</day><volume>397</volume><issue>10270</issue><fpage>220</fpage><lpage>232</lpage><pub-id pub-id-type="doi">10.1016/S0140-6736(20)32656-8</pub-id><pub-id pub-id-type="medline">33428867</pub-id></nlm-citation></ref><ref id="ref21"><label>21</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Jiang</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Jahagirdar</surname><given-names>BN</given-names> </name><name name-style="western"><surname>Reinhardt</surname><given-names>RL</given-names> </name><etal/></person-group><article-title>Pluripotency of mesenchymal stem cells derived from adult marrow</article-title><source>Nature New Biol</source><year>2002</year><month>07</month><day>4</day><volume>418</volume><issue>6893</issue><fpage>41</fpage><lpage>49</lpage><pub-id pub-id-type="doi">10.1038/nature00870</pub-id><pub-id pub-id-type="medline">12077603</pub-id></nlm-citation></ref><ref id="ref22"><label>22</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Lesn&#x00E9;</surname><given-names>S</given-names> </name><name name-style="western"><surname>Koh</surname><given-names>MT</given-names> </name><name name-style="western"><surname>Kotilinek</surname><given-names>L</given-names> </name><etal/></person-group><article-title>A specific amyloid-beta protein assembly in the brain impairs memory</article-title><source>Nature New Biol</source><year>2006</year><month>03</month><day>16</day><volume>440</volume><issue>7082</issue><fpage>352</fpage><lpage>357</lpage><pub-id pub-id-type="doi">10.1038/nature04533</pub-id><pub-id pub-id-type="medline">16541076</pub-id></nlm-citation></ref><ref id="ref23"><label>23</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Mohammadniaei</surname><given-names>M</given-names> </name><name name-style="western"><surname>Zhang</surname><given-names>M</given-names> </name><name name-style="western"><surname>Ashley</surname><given-names>J</given-names> </name><etal/></person-group><article-title>A non-enzymatic, isothermal strand displacement and amplification assay for rapid detection of SARS-CoV-2 RNA</article-title><source>Nat Commun</source><year>2021</year><month>08</month><day>24</day><volume>12</volume><issue>1</issue><fpage>5089</fpage><pub-id pub-id-type="doi">10.1038/s41467-021-25387-9</pub-id><pub-id pub-id-type="medline">34429424</pub-id></nlm-citation></ref><ref id="ref24"><label>24</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Morisky</surname><given-names>DE</given-names> </name><name name-style="western"><surname>Ang</surname><given-names>A</given-names> </name><name name-style="western"><surname>Krousel-Wood</surname><given-names>M</given-names> </name><name name-style="western"><surname>Ward</surname><given-names>HJ</given-names> </name></person-group><article-title>Predictive validity of a medication adherence measure in an outpatient setting</article-title><source>J Clin Hypertens (Greenwich)</source><year>2008</year><month>05</month><volume>10</volume><issue>5</issue><fpage>348</fpage><lpage>354</lpage><pub-id pub-id-type="doi">10.1111/j.1751-7176.2008.07572.x</pub-id><pub-id pub-id-type="medline">18453793</pub-id></nlm-citation></ref><ref id="ref25"><label>25</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Papoutsis</surname><given-names>A</given-names> </name><name name-style="western"><surname>Borody</surname><given-names>T</given-names> </name><name name-style="western"><surname>Dolai</surname><given-names>S</given-names> </name><etal/></person-group><article-title>Detection of SARS-CoV-2 from patient fecal samples by whole genome sequencing</article-title><source>Gut Pathog</source><year>2021</year><month>01</month><day>30</day><volume>13</volume><issue>1</issue><fpage>7</fpage><pub-id pub-id-type="doi">10.1186/s13099-021-00398-5</pub-id><pub-id pub-id-type="medline">33516247</pub-id></nlm-citation></ref><ref id="ref26"><label>26</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Pearton</surname><given-names>SJ</given-names> </name><name name-style="western"><surname>Norton</surname><given-names>DP</given-names> </name><name name-style="western"><surname>Ip</surname><given-names>K</given-names> </name><name name-style="western"><surname>Heo</surname><given-names>YW</given-names> </name><name name-style="western"><surname>Steiner</surname><given-names>T</given-names> </name></person-group><article-title>RETRACTED: recent progress in processing and properties of ZnO</article-title><source>Prog Mater Sci</source><year>2005</year><month>03</month><volume>50</volume><issue>3</issue><fpage>293</fpage><lpage>340</lpage><pub-id pub-id-type="doi">10.1016/j.pmatsci.2004.04.001</pub-id></nlm-citation></ref><ref id="ref27"><label>27</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Shi</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Ye</surname><given-names>P</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>C</given-names> </name><name name-style="western"><surname>Yang</surname><given-names>K</given-names> </name><name name-style="western"><surname>Guo</surname><given-names>J</given-names> </name></person-group><article-title>RETRACTED: surface hydrophilic modification for chip of centrifugal microfluidic immunoassay system</article-title><source>Micromachines (Basel)</source><year>2022</year><month>05</month><day>26</day><volume>13</volume><issue>6</issue><fpage>831</fpage><pub-id pub-id-type="doi">10.3390/mi13060831</pub-id><pub-id pub-id-type="medline">35744444</pub-id></nlm-citation></ref><ref id="ref28"><label>28</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Taylor</surname><given-names>DD</given-names> </name><name name-style="western"><surname>Gercel-Taylor</surname><given-names>C</given-names> </name></person-group><article-title>MicroRNA signatures of tumor-derived exosomes as diagnostic biomarkers of ovarian cancer</article-title><source>Gynecol Oncol</source><year>2008</year><month>07</month><volume>110</volume><issue>1</issue><fpage>13</fpage><lpage>21</lpage><pub-id pub-id-type="doi">10.1016/j.ygyno.2008.04.033</pub-id><pub-id pub-id-type="medline">18589210</pub-id></nlm-citation></ref><ref id="ref29"><label>29</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wakefield</surname><given-names>A</given-names> </name><name name-style="western"><surname>Murch</surname><given-names>S</given-names> </name><name name-style="western"><surname>Anthony</surname><given-names>A</given-names> </name><etal/></person-group><article-title>RETRACTED: Ileal-lymphoid-nodular hyperplasia, non-specific colitis, and pervasive developmental disorder in children</article-title><source>The Lancet</source><year>1998</year><month>02</month><volume>351</volume><issue>9103</issue><fpage>637</fpage><lpage>641</lpage><pub-id pub-id-type="doi">10.1016/S0140-6736(97)11096-0</pub-id></nlm-citation></ref><ref id="ref30"><label>30</label><nlm-citation citation-type="web"><article-title>Top 10 most highly cited retracted papers</article-title><source>Retraction Watch</source><access-date>2026-04-13</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://retractionwatch.com/the-retraction-watch-leaderboard/top-10-most-highly-cited-retracted-papers/">https://retractionwatch.com/the-retraction-watch-leaderboard/top-10-most-highly-cited-retracted-papers/</ext-link></comment></nlm-citation></ref><ref id="ref31"><label>31</label><nlm-citation citation-type="web"><article-title>Retraction watch data</article-title><source>GitLab</source><access-date>2026-04-13</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://gitlab.com/crossref/retraction-watch-data">https://gitlab.com/crossref/retraction-watch-data</ext-link></comment></nlm-citation></ref><ref id="ref32"><label>32</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Teixeira da Silva</surname><given-names>JA</given-names> </name><name name-style="western"><surname>Santos&#x2010;d&#x2019;Amorim</surname><given-names>K</given-names> </name><name name-style="western"><surname>Bornemann&#x2010;Cimenti</surname><given-names>H</given-names> </name></person-group><article-title>The citation of retracted papers and impact on the integrity of the scientific biomedical literature</article-title><source>Learn Publ</source><year>2025</year><month>04</month><volume>38</volume><issue>2</issue><fpage>e1667</fpage><pub-id pub-id-type="doi">10.1002/leap.1667</pub-id></nlm-citation></ref><ref id="ref33"><label>33</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>AllahRakha</surname><given-names>N</given-names> </name></person-group><article-title>UNESCO&#x2019;s AI ethics principles: challenges and opportunities</article-title><source>Irshad J Law and Policy</source><year>2024</year><volume>2</volume><issue>9</issue><fpage>24</fpage><lpage>36</lpage><pub-id pub-id-type="doi">10.59022/ijlp.225</pub-id></nlm-citation></ref></ref-list><app-group><supplementary-material id="app1"><label>Multimedia Appendix 1</label><p>Supplementary digital content.</p><media xlink:href="jmir_v28i1e88766_app1.docx" xlink:title="DOCX File, 81 KB"/></supplementary-material></app-group></back></article>