<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="research-article"><front><journal-meta><journal-id journal-id-type="nlm-ta">J Med Internet Res</journal-id><journal-id journal-id-type="publisher-id">jmir</journal-id><journal-id journal-id-type="index">1</journal-id><journal-title>Journal of Medical Internet Research</journal-title><abbrev-journal-title>J Med Internet Res</abbrev-journal-title><issn pub-type="epub">1438-8871</issn><publisher><publisher-name>JMIR Publications</publisher-name><publisher-loc>Toronto, Canada</publisher-loc></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">v27i1e76598</article-id><article-id pub-id-type="doi">10.2196/76598</article-id><article-categories><subj-group subj-group-type="heading"><subject>Original Paper</subject></subj-group></article-categories><title-group><article-title>Using ChatGPT-4 for Lay Summarization in Prostate Cancer Research to Advance Patient-Centered Communication: Large-Scale Generative AI Performance Evaluation</article-title></title-group><contrib-group><contrib contrib-type="author" corresp="yes"><name name-style="western"><surname>Rinderknecht</surname><given-names>Emily</given-names></name><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Engelmann</surname><given-names>Simon U</given-names></name><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Saberi</surname><given-names>Veronika</given-names></name><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Kirschner</surname><given-names>Clemens</given-names></name><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Kravchuk</surname><given-names>Anton P</given-names></name><xref ref-type="aff" rid="aff3">3</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Schmelzer</surname><given-names>Anna</given-names></name><xref ref-type="aff" rid="aff4">4</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Breyer</surname><given-names>Johannes</given-names></name><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Go&#x00DF;ler</surname><given-names>Christopher</given-names></name><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Mayr</surname><given-names>Roman</given-names></name><xref ref-type="aff" rid="aff5">5</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Gilfrich</surname><given-names>Christian</given-names></name><xref ref-type="aff" rid="aff3">3</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Burger</surname><given-names>Maximilian</given-names></name><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>von Winning</surname><given-names>Dominik</given-names></name><xref ref-type="aff" rid="aff3">3</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Borgmann</surname><given-names>Hendrik</given-names></name><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="aff" rid="aff6">6</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>W&#x00FC;lfing</surname><given-names>Christian</given-names></name><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="aff" rid="aff7">7</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Merseburger</surname><given-names>Axel S</given-names></name><xref ref-type="aff" rid="aff8">8</xref></contrib><contrib contrib-type="author" equal-contrib="yes"><name name-style="western"><surname>Haas</surname><given-names>Maximilian</given-names></name><xref ref-type="aff" rid="aff2">2</xref><xref ref-type="fn" rid="equal-contrib1">*</xref></contrib><contrib contrib-type="author" equal-contrib="yes"><name name-style="western"><surname>May</surname><given-names>Matthias</given-names></name><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="aff" rid="aff3">3</xref><xref ref-type="fn" rid="equal-contrib1">*</xref></contrib></contrib-group><aff id="aff1"><institution>Working Group on Artificial Intelligence and Digitalization of the German Society of Urology</institution><country>Germany</country></aff><aff id="aff2"><institution>Department of Urology, University of Regensburg, Caritas St Josef Medical Center</institution><addr-line>Landshuter Street 65</addr-line><addr-line>Regensburg</addr-line><country>Germany</country></aff><aff id="aff3"><institution>Department of Urology, St. Elisabeth Hospital Straubing</institution><addr-line>Straubing</addr-line><country>Germany</country></aff><aff id="aff4"><institution>Department of Urology, Nuremberg General Hospital, Paracelsus Medical University</institution><addr-line>Nuremberg</addr-line><country>Germany</country></aff><aff id="aff5"><institution>Department of Urology, University Hospital Augsburg</institution><addr-line>Augsburg</addr-line><country>Germany</country></aff><aff id="aff6"><institution>Department of Urology, Faculty of Health Sciences Brandenburg, Brandenburg Medical School Theodor Fontan</institution><addr-line>Brandenburg</addr-line><country>Germany</country></aff><aff id="aff7"><institution>Department of Urology, Asklepios Klinik Altona</institution><addr-line>Hamburg</addr-line><country>Germany</country></aff><aff id="aff8"><institution>Department of Urology, University Hospital Schleswig-Holstein, Campus L&#x00FC;beck</institution><addr-line>L&#x00FC;beck</addr-line><country>Germany</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Cahill</surname><given-names>Naomi</given-names></name></contrib></contrib-group><contrib-group><contrib contrib-type="reviewer"><name name-style="western"><surname>Xu</surname><given-names>Chuan</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Rakedzon</surname><given-names>Tzipora</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to Emily Rinderknecht, Department of Urology, University of Regensburg, Caritas St Josef Medical Center, Landshuter Street 65, Regensburg, 93053, Germany, 49 9417821000; <email>erinderknecht@csj.de</email></corresp><fn fn-type="equal" id="equal-contrib1"><label>*</label><p>these authors contributed equally</p></fn></author-notes><pub-date pub-type="collection"><year>2025</year></pub-date><pub-date pub-type="epub"><day>19</day><month>11</month><year>2025</year></pub-date><volume>27</volume><elocation-id>e76598</elocation-id><history><date date-type="received"><day>27</day><month>04</month><year>2025</year></date><date date-type="rev-recd"><day>21</day><month>09</month><year>2025</year></date><date date-type="accepted"><day>22</day><month>09</month><year>2025</year></date></history><copyright-statement>&#x00A9; Emily Rinderknecht, Simon U Engelmann, Veronika Saberi, Clemens Kirschner, Anton P Kravchuk, Anna Schmelzer, Johannes Breyer, Christopher Go&#x00DF;ler, Roman Mayr, Christian Gilfrich, Maximilian Burger, Dominik von Winning, Hendrik Borgmann, Christian W&#x00FC;lfing, Axel S Merseburger, Maximilian Haas, Matthias May. Originally published in the Journal of Medical Internet Research (<ext-link ext-link-type="uri" xlink:href="https://www.jmir.org">https://www.jmir.org</ext-link>), 19.11.2025. </copyright-statement><copyright-year>2025</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in the Journal of Medical Internet Research (ISSN 1438-8871), is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="https://www.jmir.org/">https://www.jmir.org/</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://www.jmir.org/2025/1/e76598"/><abstract><sec><title>Background</title><p>The increasing volume and complexity of biomedical literature pose challenges for making scientific knowledge accessible to lay audiences. Lay summaries, now widely encouraged or required by journals, aim to bridge this gap by promoting health literacy, patient engagement, and public trust. However, many are written by scientists without formal training in plain-language communication, often resulting in limited clarity, readability, and consistency. Generative large language models such as ChatGPT-4 offer a scalable opportunity to support lay summary creation, though their effectiveness within specific clinical domains has not been systematically evaluated at scale.</p></sec><sec><title>Objective</title><p>This study aimed to assess ChatGPT-4&#x2019;s performance in generating lay summaries for prostate cancer studies. A secondary objective was to evaluate how prompt design influences summary quality, aiming to provide practical guidance for the use of generative artificial intelligence (AI) in scientific publishing.</p></sec><sec sec-type="methods"><title>Methods</title><p>A total of 204 consecutive articles on prostate cancer were extracted from a high-ranking oncology journal mandating lay summaries. Each abstract was processed with ChatGPT-4 using 2 prompts: a simple prompt based on the journal&#x2019;s guidelines and an extended prompt refined to improve readability. AI-generated and original summaries were evaluated using 3 criteria: readability (Flesch-Kincaid Reading Ease [FKRE]), factual accuracy (5-point Likert scale, blinded rating by 2 clinical experts), and compliance with word count instructions (120&#x2010;150 words). Summaries were classified as high-quality as a composite outcome if they met all 3 benchmarks: FKRE &#x003E;30, accuracy &#x2265;4 from both raters, and word count within range. Statistical comparisons used Wilcoxon signed-rank and paired 2-tailed <italic>t</italic> tests (<italic>P</italic>&#x003C;.05).</p></sec><sec sec-type="results"><title>Results</title><p>ChatGPT-4-generated lay summaries showed an improvement in readability compared to human-written versions, with the extended prompt achieving higher scores than the simple prompt (median FKRE: extended prompt 47, IQR 42-56; simple prompt 36, IQR 29-43; original 20, IQR 9.5&#x2010;29; <italic>P</italic>&#x003C;.001). Factual accuracy was higher for the AI-generated lay summaries compared to originals (median factual accuracy score: extended prompt 5, IQR 5-5; simple prompt 5, IQR 5-5; original 5, IQR 4-5; <italic>P</italic>&#x003C;.001) in this dataset. Compliance with word count instructions was greater for both AI-generated summaries in comparison to originals (wrong number of words; extended prompt 39 (19%), simple prompt 40 (20%), original 140 (69%)<italic>; P</italic>&#x003C;.001). Between simple and extended prompts, there were no significant differences in accuracy (<italic>P</italic>=.53) and word count compliance (<italic>P</italic>=.87). The proportion rated as high-quality was 79.4% for the extended prompt, 54.9% for the simple prompt, and 5.4% for original summaries (<italic>P</italic>&#x003C;.001).</p></sec><sec sec-type="conclusions"><title>Conclusions</title><p>With optimized prompting, ChatGPT-4 produced lay summaries that, on average, scored higher than author-written versions in readability, factual accuracy, and structural compliance within our dataset. These results support integrating generative AI into editorial workflows to improve science communication for nonexpert audiences. Limitations include focus on a single clinical domain and journal, and absence of layperson evaluation.</p></sec></abstract><kwd-group><kwd>health literacy</kwd><kwd>large language models</kwd><kwd>prompt engineering</kwd><kwd>digital health communication</kwd><kwd>patient engagement</kwd><kwd>artificial intelligence in publishing</kwd><kwd>readability assessment</kwd><kwd>human-AI collaboration</kwd><kwd>cancer information accessibility</kwd><kwd>natural language generation</kwd></kwd-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><p>In recent years, the inclusion of patient voices in the design, communication, and dissemination of medical research has gained prominence as a central tenet of participatory health care. Meaningful involvement of patients and caregivers is increasingly recognized not only as an ethical imperative but also as a key determinant of research relevance, knowledge translation, and patient empowerment [<xref ref-type="bibr" rid="ref1">1</xref>-<xref ref-type="bibr" rid="ref5">5</xref>]. Central to this evolving paradigm is the availability of scientific content in formats that are understandable and accessible to laypersons.</p><p>Lay summaries (also referred to as plain language summaries in some publishing contexts) are an increasingly common tool intended to bridge the gap between complex biomedical research and the informational needs of patients and the wider public. In this paper, we use the term lay summary as the preferred descriptor, while acknowledging plain language summary as a recognized synonym. In response to regulatory frameworks [<xref ref-type="bibr" rid="ref6">6</xref>,<xref ref-type="bibr" rid="ref7">7</xref>] and patient engagement initiatives, several publishers and institutions have implemented policies requiring authors to provide summaries in language that is free from jargon and suitable for non-specialist audiences [<xref ref-type="bibr" rid="ref8">8</xref>,<xref ref-type="bibr" rid="ref9">9</xref>]. The European Union&#x2019;s Clinical Trials Regulation (EU No 536/2014), for example, explicitly mandates that clinical trial results be made available in a lay-accessible format [<xref ref-type="bibr" rid="ref6">6</xref>,<xref ref-type="bibr" rid="ref7">7</xref>].</p><p>Despite such mandates, the quality of lay summaries remains variable. Prior studies have identified substantial deficits in readability, coherence, and alignment with health literacy standards [<xref ref-type="bibr" rid="ref10">10</xref>-<xref ref-type="bibr" rid="ref14">14</xref>]. Even with detailed guidance, translating complex scientific content into clear, accurate, and engaging language for nonexpert audiences remains a considerable challenge [<xref ref-type="bibr" rid="ref10">10</xref>-<xref ref-type="bibr" rid="ref15">15</xref>].</p><p>Recent advances in generative artificial intelligence (AI) offer promising avenues for addressing these challenges. Large language models (LLMs), most notably ChatGPT-4, have demonstrated remarkable capabilities in natural language generation, including summarization, paraphrasing, and simplification of complex content [<xref ref-type="bibr" rid="ref16">16</xref>-<xref ref-type="bibr" rid="ref21">21</xref>]. Their potential to generate lay-accessible summaries&#x2014;when appropriately prompted&#x2014;may alleviate the burden on researchers and improve the consistency and accessibility of scientific communication. Recent scholarship further illustrates the potential of AI-assisted tools in science communication. For example, Markowitz [<xref ref-type="bibr" rid="ref22">22</xref>] shows that AI can improve the clarity of complex information and positively influence perceptions of science, while &#x0160;uto Pavi&#x010D;i&#x0107; et al [<xref ref-type="bibr" rid="ref23">23</xref>] provide empirical evidence that ChatGPT can enhance plain language summaries of Cochrane oncology reviews.</p><p>In the field of oncology, the journal <italic>Cancers</italic> provides a uniquely structured environment for evaluating such technologies. As one of the few journals that consistently requires lay summaries for all accepted papers, it offers a standardized editorial framework against which AI-generated outputs can be compared [<xref ref-type="bibr" rid="ref8">8</xref>]. In this context, this study aimed to evaluate the performance of ChatGPT-4 in generating lay summaries of prostate cancer research articles, comparing them to human-written counterparts in terms of readability, factual accuracy, and adherence to editorial standards (operationalized as compliance with word count requirements).</p></sec><sec id="s2" sec-type="methods"><title>Methods</title><sec id="s2-1"><title>Article Selection</title><p>This study includes consecutive articles on the topic of prostate cancer published in <italic>Cancers</italic> in 2024. To identify the articles, the PubMed database was searched using the search string:</p><p>&#x201C;Cancers (Basel)&#x201D;[Journal] AND (&#x201C;prostate cancer&#x201D; OR &#x201C;prostate neoplasm&#x201D; OR &#x201C;prostate carcinoma&#x201D;).</p><p>All articles with an EPUB date between January 1, 2024, and December 31, 2024, were included. Articles were excluded if they were not related to prostate cancer, had an EPUB date outside the defined time frame, lacked an abstract, original lay summary, or keywords, or if they were not classified as either original research articles or reviews.</p><p>Although this study does not involve clinical implementation, it applies key principles articulated in the Developmental and Exploratory Clinical Investigations of Decision-Support Systems driven by Artificial Intelligence (DECIDE-AI) framework, including transparency, structured prompt design, and methodological rigor, thereby aligning with the early evaluative steps required for responsible, patient-centered AI applications [<xref ref-type="bibr" rid="ref24">24</xref>]. The DECIDE-AI checklist (<xref ref-type="supplementary-material" rid="app5">Checklist 1</xref>) was selected because it specifically addresses the methodological and ethical challenges associated with the early-stage evaluation of AI-driven decision support systems. As a formative assessment of generative AI in the context of patient-facing communication, this study reflects the type of preparatory work envisioned by DECIDE-AI prior to real-world deployment [<xref ref-type="bibr" rid="ref24">24</xref>].</p><p>Article characteristics concerning the affiliation of the corresponding author and type of article (original research vs meta-analysis or review) were extracted from the articles&#x2019; metadata. Article classification into the categories diagnostic, therapy, both, or others was conducted manually and independently by 3 experts (ER, MH, and MM); discrepancies were resolved by joint consensus. Similarly, articles were manually classified into basic, clinical, or translational science, based on predefined criteria considering the study&#x2019;s primary focus, methodology, and translational relevance. Basic science studies investigate molecular, cellular, or genetic mechanisms typically using in vitro or animal models, clinical science studies involve patients or patient-derived data focusing on diagnosis, treatment, or outcomes, and translational science studies bridge both by applying mechanistic insights to patient-oriented investigations such as biomarker validation or early-phase therapeutic studies.</p></sec><sec id="s2-2"><title>Development of Standardized Prompts for Data Input Into ChatGPT-4</title><p>ChatGPT-4 was selected as the LLM because of its widespread use and in accordance with methodologies applied in previous studies [<xref ref-type="bibr" rid="ref19">19</xref>]. We created a simple prompt to instruct ChatGPT-4 to create a layperson summary based on the abstract, keywords, and title of the paper, adhering to the guidelines provided by the journals. Subsequently, an extended prompt was developed with the aim of optimizing the lay summary in line with the guidelines outlined in the Good Lay Summary Practice Guidelines [<xref ref-type="bibr" rid="ref7">7</xref>]. The goal was to ensure that the lay summary was comprehensible to readers with a reading equivalent to sixth grade, without compromising factual accuracy or disregarding the journals&#x2019; requirements. The prompts are depicted in <xref ref-type="other" rid="box1">Textbox 1</xref>. More detailed information on prompt development and refinement is included in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>. Each article was processed using both prompts, with a new ChatGPT-4 session initiated for each input.</p><boxed-text id="box1"><title> ChatGPT-4 input prompts for creating a layperson summary. Differences are highlighted in italics.</title><p><bold>Simple prompt</bold></p><p>Dear ChatGPT-4o,</p><p>I kindly request your assistance in crafting a Simple Summary as part of a scientific study. The Simple Summary must adhere to the following guidelines:</p><p>It should be written in one paragraph, in layman&#x2019;s terms, to explain why the research is being suggested, what the authors aim to achieve, and how the findings from this research may impact the research community. Please use as few abbreviations as possible, and do not cite references in the Simple Summary. The Simple Summary must not exceed 150 words.</p><p>To provide you with the necessary context for creating this Simple Summary, I will supply you with the study title, a scientifically accurate abstract (not in layman&#x2019;s terms), and the relevant keywords.</p><p>Study title: &#x201C;&#x2026;&#x201D;</p><p>Scientifically accurate abstract: &#x201C;&#x2026;&#x201D;</p><p>Keywords: &#x201C;&#x2026;&#x201D;</p><p>Please note: Summarize this unstructured abstract (simple summary) in lay language, highlighting the study purpose, methods, key findings, and practical importance of these findings for the general public. Additionally, be aware that the Simple Summary must not exceed 150 words, but it should make the most of this limit.</p><p/><p><bold>Extended prompt</bold></p><p>Dear ChatGPT-4o,</p><p>I kindly request your assistance in crafting a Simple Summary as part of a scientific study. The Simple Summary must adhere to the following guidelines:</p><p>It should be written in one paragraph, in layman&#x2019;s terms, to explain why the research is being suggested, what the authors aim to achieve, and how the findings from this research may impact the research community. Please use as few abbreviations as possible, and do not cite references in the Simple Summary. The Simple Summary must not exceed 150 words.</p><p><italic>The Simple Summary should be crafted with a focus on maximizing readability, aiming for the highest possible Flesch-Kincaid Reading Ease score.</italic></p><p>To provide you with the necessary context for creating this Simple Summary, I will supply you with the study title, a scientifically accurate abstract (not in layman&#x2019;s terms), and the relevant keywords.</p><p>Study title: &#x201C;&#x2026;&#x201D;</p><p>Scientifically accurate abstract: &#x201C;&#x2026;&#x201D;</p><p>Keywords: &#x201C;&#x2026;&#x201D;</p><p>Please note: Summarize this unstructured abstract (simple summary) in lay language at a 6th grade reading level, highlighting the study purpose, methods, key findings, and practical importance of these findings for the general public. Additionally, be aware that the Simple Summary must not exceed 150 words, but it should make the most of this limit.</p></boxed-text></sec><sec id="s2-3"><title>Readability Assessment</title><p>Readability indices, grade-level indicators, and text metrics were automatically calculated for the original lay summary, the ChatGPT-4 simple prompt summary, and the ChatGPT-4 extended prompt summary using the Readability Test Tool provided by WebFx (WebFx, Inc) [<xref ref-type="bibr" rid="ref25">25</xref>] as previously described [<xref ref-type="bibr" rid="ref14">14</xref>,<xref ref-type="bibr" rid="ref18">18</xref>,<xref ref-type="bibr" rid="ref19">19</xref>]. The assessment encompassed multiple validated readability indices, including the Flesch-Kincaid Reading Ease (FKRE), Flesch-Kincaid Grade Level (FKGL), Gunning Fog Score, Simple Measure of Gobbledygook Index, Coleman-Liau Index, and Automated Readability Index. In addition, text metrics were analyzed, comprising the number of sentences, total word count, count and proportion of complex words, average words per sentence, and average syllables per word. The readability assessment was conducted between February 1, 2025, and March 31, 2025.</p></sec><sec id="s2-4"><title>Factual Accuracy Assessment</title><p>The factual accuracy of the lay summaries was evaluated in a blinded manner by 2 independent raters (JB and MM), both of whom possess sufficient scientific expertise (authors of &#x003E;100 peer-reviewed scientific articles). The assessment was conducted using a 5-point Likert scale to evaluate the alignment with the abstract and keywords, ranging from 1=very poor to 5=excellent. <xref ref-type="table" rid="table1">Table 1</xref> outlines the specific criteria used for the evaluation. Both quality assessments were incorporated into the overall quality assessment of the lay summaries&#x2019; performance. For the graphical representation of results, only the factual accuracy ratings from rater 1 were considered. To reduce evaluation bias, all summaries were anonymized prior to review. Evaluators were blinded to both the origin (human vs AI-generated) and the prompt type. The order of presentation was randomized for each reviewer. To ensure transparency, examples of lay summaries&#x2014;with their corresponding ratings (by rater 1, MM) and explanations for the assigned scores&#x2014;are provided in <xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>.</p><table-wrap id="t1" position="float"><label>Table 1.</label><caption><p>Description of the 5-point Likert scale used for the evaluation of the factual accuracy of the lay summaries.</p></caption><table id="table1" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Score</td><td align="left" valign="bottom">Explanation</td></tr></thead><tbody><tr><td align="left" valign="top">1=very poor</td><td align="left" valign="top">The lay summary contains significant factual errors and diverges substantially from the scientific abstract. Essential information is missing, which severely compromises its clarity and accuracy.</td></tr><tr><td align="left" valign="top">2=poor</td><td align="left" valign="top">The lay summary has multiple factual inaccuracies and diverges in certain areas from the scientific abstract. Some key information is missing, diminishing its overall effectiveness.</td></tr><tr><td align="left" valign="top">3=acceptable</td><td align="left" valign="top">The lay summary is mostly accurate but contains minor factual inaccuracies or omissions. It generally aligns with the scientific abstract, though some details could be more precise or comprehensive.</td></tr><tr><td align="left" valign="top">4=good</td><td align="left" valign="top">The lay summary is factually accurate and largely consistent with the scientific abstract. Only minor, nonessential information may be missing or slightly simplified.</td></tr><tr><td align="left" valign="top">5=excellent</td><td align="left" valign="top">The lay summary is completely accurate, fully aligns with the scientific abstract, and includes all essential information. It conveys the content clearly and effectively, without omitting any important details.</td></tr></tbody></table></table-wrap></sec><sec id="s2-5"><title>Adherence to Journal Instructions Assessment</title><p>Adherence to journal instructions was operationalized as compliance with the required summary length of 120&#x2010;150 words.</p></sec><sec id="s2-6"><title>Overall Quality Assessment</title><p>To facilitate an integrative evaluation of lay summary quality, a composite score was introduced that incorporated the 3 primary outcome measures: readability, factual accuracy, and adherence to journal instructions (operationalized solely as compliance with the required summary length). High-quality lay summaries were defined using a composite threshold of FKRE&#x2265;30, factual accuracy&#x2265;4 (defined by 2 content assessments), and word count between 120 and 150 words. The FKRE cut-off of &#x2265;30 was chosen as a pragmatic boundary informed by the Flesch original classification distinguishing scientific from non-scientific texts and by general health literacy recommendations that patient-directed materials should aim for a sixth- to eighth-grade reading level. While some frameworks suggest FKRE&#x2265;40 as a stricter benchmark for lay accessibility, we adopted &#x2265;30 to capture the range of readability levels realistically encountered in oncology communication [<xref ref-type="bibr" rid="ref26">26</xref>-<xref ref-type="bibr" rid="ref28">28</xref>].</p><p>The factual accuracy threshold of &#x2265;4 was selected to denote minimal deviation from the source text, consistent with prior LLM assessment protocols [<xref ref-type="bibr" rid="ref19">19</xref>].</p><p>The word count range of 120 to 150 words reflected the editorial requirements of <italic>Cancers</italic>, the journal that provided the testbed for this evaluation [<xref ref-type="bibr" rid="ref8">8</xref>].</p><p>If these 3 criteria were not met, a scaling was applied based on the definitions outlined in <xref ref-type="table" rid="table2">Table 2</xref>. The overall quality assessment represents an exploratory composite measure and was not defined as a primary outcome.</p><table-wrap id="t2" position="float"><label>Table 2.</label><caption><p>Overall quality assessment of the lay summaries. Exploratory composite measure integrating the 3 measures: readability, factual accuracy, and correct text length.</p></caption><table id="table2" frame="hsides" rules="groups"><thead><tr><td align="left" valign="top">Measure</td><td align="left" valign="top">Scaling<sup><xref ref-type="table-fn" rid="table2fn1">a</xref></sup></td></tr><tr><td align="left" valign="bottom" colspan="2">Readability</td></tr></thead><tbody><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>FKRE<sup><xref ref-type="table-fn" rid="table2fn2">b</xref></sup>&#x003C;30</td><td align="left" valign="top">1 point</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>FKRE&#x003C;20</td><td align="left" valign="top">2 points</td></tr><tr><td align="left" valign="top" colspan="2">Factual accuracy</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>One content assessment &#x003C;4</td><td align="left" valign="top">1 point</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Both content assessments &#x003C;4</td><td align="left" valign="top">2 points</td></tr><tr><td align="left" valign="top" colspan="2">Correct text length</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Text length &#x003C;120 words</td><td align="left" valign="top">1 point</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Text length &#x003E;150 words</td><td align="left" valign="top">1 point</td></tr></tbody></table><table-wrap-foot><fn id="table2fn1"><p><sup>a</sup>Overall quality of the lay summaries: 0 point (high quality), 1-2 points (minor limitations), 3 points (moderate limitation), and 4-5 points (major limitations).</p></fn><fn id="table2fn2"><p><sup>b</sup>FKRE: Flesch-Kincaid Reading Ease.</p></fn></table-wrap-foot></table-wrap></sec><sec id="s2-7"><title>Statistical Analysis</title><p>Statistical analyses were performed using SPSS (version 29.0; IBM Corp). Normality of distribution was assessed using the Shapiro-Wilk test (data available upon request). Descriptive statistics were reported as frequencies or as medians with IQR, as appropriate. To compare the different types of lay summaries (original author-provided summaries vs ChatGPT-4 simple prompt vs ChatGPT-4 extended prompt), paired 2-tailed <italic>t</italic> tests were applied for normally distributed continuous variables, while the Wilcoxon signed-rank test was used for nonnormally distributed or ordinal data. Interrater reliability for factual accuracy ratings was evaluated using the Cohen &#x03BA; coefficient. Differences between articles from different topic categories (clinical science, basic science, and translational science) were initially assessed using the Kruskal-Wallis test. In cases where significant overall differences were observed, pairwise post hoc comparisons were conducted using the Dunn test with Bonferroni correction. A <italic>P</italic> value &#x003C;.05 was considered statistically significant. All tests were 2-tailed. Visualizations were generated using R (R Foundation for Statistical Computing).</p></sec><sec id="s2-8"><title>Ethical Considerations</title><p>All journal content used in this study was exclusively obtained from publicly accessible sources. The use of publicly accessible abstracts for scientific analysis complies with the principles of &#x201C;fair use&#x201D; as defined by the US Copyright Act (17 US Code &#x00A7; 107) and the corresponding provisions of the German Copyright Act (UrhG, &#x00A7; 51). All referenced materials have been duly cited and acknowledged in accordance with academic standards (<xref ref-type="supplementary-material" rid="app3">Multimedia Appendix 3</xref>). Although the study only involved public data and no human participants, a positive ethical approval was obtained from the Ethics Committee of the University of Regensburg (UKR-EK-24-3835-104). In our study setting, obtaining informed consent was not required. The use of ChatGPT-4 was subject to internal governance procedures, including documentation of prompt engineering and blinded human evaluation to mitigate bias. All expert raters involved in this study were transparently identified, including their academic qualifications, institutional affiliations, and roles within the project. Ethical aspects concerning the use of generative AI in medical and scientific communication were carefully considered. Potential limitations, risks, and implications related to AI-assisted content generation were addressed where relevant and are discussed in detail in the respective sections of the paper. All prompts and outputs were archived locally in structured, version-controlled Microsoft Excel files that were accessible only to the research team, thereby safeguarding integrity and enabling retrospective auditing. The complete set of prompts and all outputs are provided in <xref ref-type="supplementary-material" rid="app3">Multimedia Appendix 3</xref> to ensure transparency and reproducibility. Moreover, we strived for maximum transparency in the presentation of our methodology, including data sources, analytical procedures, and reviewer involvement.</p></sec></sec><sec id="s3" sec-type="results"><title>Results</title><sec id="s3-1"><title>Article Characteristics</title><p>From January 1, 2024, to December 31, 2024, a total of 229 articles were screened. A total of 23 articles (10%) were excluded because they were not primarily related to prostate cancer. Two (0.87%) additional articles were excluded as they were neither classified as original research articles nor reviews, consequently lacking a lay summary. This resulted in the inclusion of 204 articles (<xref ref-type="supplementary-material" rid="app3">Multimedia Appendix 3</xref>).</p><p>From the 204 articles, 60 (29%) focused on prostate cancer diagnostics, 79 (39%) on prostate cancer therapy, and 14 (6.9%) covered both prostate cancer diagnostics and therapy. The remaining 51 (25%) articles addressed other topics. Accordingly, 101 (50%) articles were categorized as clinical research, 36 (18%) as basic research, and 67 (33%) as translational research. In total, 123 (60%) were original research articles, while 81 (40%) were meta-analyses or review articles.</p><p>The corresponding authors of 96 (47%) articles were affiliated with institutions in Europe, of 68 (33%) with institutions in North America, of 3 (1.5%) in South America, of 30 (15%) in Asia, and of 7 (3.4%) in Australia.</p></sec><sec id="s3-2"><title>Readability, Factual Accuracy, Word Count, and Composite Overall Quality Assessment</title><p>Compared to the original lay summaries, those generated by ChatGPT-4 (using both simple and extended prompts) exhibited improved readability metrics, generally higher factual accuracy, and better adherence to the predefined correct word count. Consequently, a greater proportion of the ChatGPT-4 generated lay summaries met criteria for high-quality classification (ChatGPT-4 extended prompt 79%; ChatGPT-4 simple prompt 55%; original lay summary 5.4%; <italic>P</italic>&#x003C;.001). Interobserver agreement for the content assessments was substantial (K=0.679; <italic>P</italic>&#x003C;.001). <xref ref-type="table" rid="table3">Tables 3</xref><xref ref-type="table" rid="table4"/>-<xref ref-type="table" rid="table5">5</xref> present a detailed description and statistical comparison of text metrics, readability scores, factual accuracy, and overall assessment across the original lay summary, the ChatGPT-4 simple prompt, and the ChatGPT-4 extended prompt. <xref ref-type="fig" rid="figure1">Figure 1</xref> displays a comparative grid plot of FKRE scores for the three lay summary versions, illustrating the higher median readability values alongside the corresponding factual accuracy scores.</p><table-wrap id="t3" position="float"><label>Table 3.</label><caption><p>Descriptive data regarding length metrics and readability scores of the original lay summaries and those generated by ChatGPT-4 (simple vs extended prompt; N=204). The highest readability performance indices are highlighted in italic.</p></caption><table id="table3" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Parameter</td><td align="left" valign="bottom">Original<break/>lay summary</td><td align="left" valign="bottom">ChatGPT-4<break/>simple<break/>prompt</td><td align="left" valign="bottom">ChatGPT-4<break/>extended prompt</td><td align="left" valign="bottom">Standardized test statistic (Z values)</td><td align="left" valign="bottom"><italic>P</italic> values</td></tr></thead><tbody><tr><td align="left" valign="top" colspan="6">Text metrics</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Sentences, median (IQR)</td><td align="left" valign="top">5 (4-7)</td><td align="left" valign="top">6 (6-7)</td><td align="left" valign="top">7 (6-7)</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>5.528<sup><xref ref-type="table-fn" rid="table3fn1">a</xref></sup><sup><xref ref-type="table-fn" rid="table3fn2">b</xref></sup></p></list-item><list-item><p>4.627<sup><xref ref-type="table-fn" rid="table3fn2">b</xref></sup><sup><xref ref-type="table-fn" rid="table3fn3">c</xref></sup></p></list-item><list-item><p>6.572<sup><xref ref-type="table-fn" rid="table3fn2">b</xref></sup><sup><xref ref-type="table-fn" rid="table3fn4">d</xref></sup></p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>&#x003C;.001<sup><xref ref-type="table-fn" rid="table3fn1">a</xref></sup><sup><xref ref-type="table-fn" rid="table3fn3">c</xref></sup><sup><xref ref-type="table-fn" rid="table3fn4">d</xref></sup></p></list-item></list></td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Words, median (IQR)</td><td align="left" valign="top">117 (95&#x2010;140)</td><td align="left" valign="top">139 (129&#x2010;144)</td><td align="left" valign="top">139 (129&#x2010;145)</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>.121<sup><xref ref-type="table-fn" rid="table3fn1">a</xref></sup><sup><xref ref-type="table-fn" rid="table3fn5">e</xref></sup></p></list-item><list-item><p>6.956<sup><xref ref-type="table-fn" rid="table3fn2">b</xref></sup><sup><xref ref-type="table-fn" rid="table3fn3">c</xref></sup></p></list-item><list-item><p>6.625<sup><xref ref-type="table-fn" rid="table3fn2">b</xref></sup><sup><xref ref-type="table-fn" rid="table3fn4">d</xref></sup></p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>.90<sup><xref ref-type="table-fn" rid="table3fn1">a</xref></sup></p></list-item><list-item><p>&#x003C;.001<sup><xref ref-type="table-fn" rid="table3fn3">c</xref></sup><sup><xref ref-type="table-fn" rid="table3fn4">d</xref></sup></p></list-item></list></td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Complex words, median (IQR)</td><td align="left" valign="top">31 (23&#x2010;39)</td><td align="left" valign="top">26 (21&#x2010;30)</td><td align="left" valign="top">20 (15&#x2010;24)</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>11.619<sup><xref ref-type="table-fn" rid="table3fn1">a</xref></sup><sup><xref ref-type="table-fn" rid="table3fn5">e</xref></sup></p></list-item><list-item><p>6.712<sup><xref ref-type="table-fn" rid="table3fn3">c</xref></sup><sup><xref ref-type="table-fn" rid="table3fn5">e</xref></sup></p></list-item><list-item><p>11.319<sup><xref ref-type="table-fn" rid="table3fn4">d</xref></sup><sup><xref ref-type="table-fn" rid="table3fn5">e</xref></sup></p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>&#x003C;.001<sup><xref ref-type="table-fn" rid="table3fn1">a</xref></sup><sup><xref ref-type="table-fn" rid="table3fn3">c</xref></sup><sup><xref ref-type="table-fn" rid="table3fn4">d</xref></sup></p></list-item></list></td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Percent of complex words, median (IQR)</td><td align="left" valign="top">27 (23&#x2010;31)</td><td align="left" valign="top">19 (16&#x2010;22)</td><td align="left" valign="top">14 (11&#x2010;18)</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>11.818<sup><xref ref-type="table-fn" rid="table3fn1">a</xref></sup><sup><xref ref-type="table-fn" rid="table3fn5">e</xref></sup></p></list-item><list-item><p>11.237<sup><xref ref-type="table-fn" rid="table3fn3">c</xref></sup><sup><xref ref-type="table-fn" rid="table3fn5">e</xref></sup></p></list-item><list-item><p>12.338<sup><xref ref-type="table-fn" rid="table3fn4">d</xref></sup><sup><xref ref-type="table-fn" rid="table3fn5">e</xref></sup></p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>&#x003C;.001<sup><xref ref-type="table-fn" rid="table3fn1">a</xref></sup><sup><xref ref-type="table-fn" rid="table3fn3">c</xref></sup><sup><xref ref-type="table-fn" rid="table3fn4">d</xref></sup></p></list-item></list></td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Average words per sentence, median (IQR)</td><td align="left" valign="top">22 (19&#x2010;25)</td><td align="left" valign="top">22 (20&#x2010;24)</td><td align="left" valign="top">20 (19&#x2010;22)</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>6.191<sup><xref ref-type="table-fn" rid="table3fn1">a</xref></sup><sup><xref ref-type="table-fn" rid="table3fn5">e</xref></sup></p></list-item><list-item><p>.894<sup><xref ref-type="table-fn" rid="table3fn3">c</xref></sup><sup><xref ref-type="table-fn" rid="table3fn5">e</xref></sup></p></list-item><list-item><p>3.790<sup><xref ref-type="table-fn" rid="table3fn4">d</xref></sup><sup><xref ref-type="table-fn" rid="table3fn5">e</xref></sup></p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>.37<sup><xref ref-type="table-fn" rid="table3fn3">c</xref></sup></p></list-item><list-item><p>&#x003C;<bold>.</bold>001<sup><xref ref-type="table-fn" rid="table3fn1">a</xref></sup><sup><xref ref-type="table-fn" rid="table3fn4">d</xref></sup></p></list-item></list></td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Average syllables per word, median (IQR)</td><td align="left" valign="top">1.9 (1.9&#x2010;2.1)</td><td align="left" valign="top">1.8 (1.7&#x2010;1.9)</td><td align="left" valign="top">1.6 (1.6&#x2010;1.7)</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>11.714<sup><xref ref-type="table-fn" rid="table3fn1">a</xref></sup><sup><xref ref-type="table-fn" rid="table3fn5">e</xref></sup></p></list-item><list-item><p>10.852<sup><xref ref-type="table-fn" rid="table3fn3">c</xref></sup><sup><xref ref-type="table-fn" rid="table3fn5">e</xref></sup></p></list-item><list-item><p>12.234<sup><xref ref-type="table-fn" rid="table3fn4">d</xref></sup><sup><xref ref-type="table-fn" rid="table3fn5">e</xref></sup></p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>&#x003C;.001<sup><xref ref-type="table-fn" rid="table3fn1">a</xref></sup><sup><xref ref-type="table-fn" rid="table3fn3">c</xref></sup><sup><xref ref-type="table-fn" rid="table3fn4">d</xref></sup></p></list-item></list></td></tr><tr><td align="left" valign="top" colspan="6">Readability Scores<sup><xref ref-type="table-fn" rid="table3fn6">f</xref></sup></td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>FKRE<sup><xref ref-type="table-fn" rid="table3fn7">g</xref></sup>, median (IQR)</td><td align="left" valign="top">20 (9.5&#x2010;29)</td><td align="left" valign="top">36 (29&#x2010;43)</td><td align="left" valign="top">47 (42&#x2010;56)</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>12.106<sup><xref ref-type="table-fn" rid="table3fn1">a</xref></sup><sup><xref ref-type="table-fn" rid="table3fn2">b</xref></sup></p></list-item><list-item><p>10.852<sup><xref ref-type="table-fn" rid="table3fn3">c</xref></sup><sup><xref ref-type="table-fn" rid="table3fn2">b</xref></sup></p></list-item><list-item><p>12.268<sup><xref ref-type="table-fn" rid="table3fn4">d</xref></sup><sup><xref ref-type="table-fn" rid="table3fn2">b</xref></sup></p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>&#x003C;.001<sup><xref ref-type="table-fn" rid="table3fn1">a</xref></sup><sup><xref ref-type="table-fn" rid="table3fn3">c</xref></sup><sup><xref ref-type="table-fn" rid="table3fn4">d</xref></sup></p></list-item></list></td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>FKGL<sup><xref ref-type="table-fn" rid="table3fn8">h</xref></sup>, median (IQR)</td><td align="left" valign="top">16 (14&#x2010;18)</td><td align="left" valign="top">14 (13&#x2010;15)</td><td align="left" valign="top">12 (10&#x2010;13)</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>11.693<sup><xref ref-type="table-fn" rid="table3fn1">a</xref></sup><sup><xref ref-type="table-fn" rid="table3fn5">e</xref></sup></p></list-item><list-item><p>9.446<sup><xref ref-type="table-fn" rid="table3fn3">c</xref></sup><sup><xref ref-type="table-fn" rid="table3fn5">e</xref></sup></p></list-item><list-item><p>11.936<sup><xref ref-type="table-fn" rid="table3fn4">d</xref></sup><sup><xref ref-type="table-fn" rid="table3fn5">e</xref></sup></p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>&#x003C;.001<sup><xref ref-type="table-fn" rid="table3fn1">a</xref></sup><sup><xref ref-type="table-fn" rid="table3fn3">c</xref></sup><sup><xref ref-type="table-fn" rid="table3fn4">d</xref></sup></p></list-item></list></td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>GFS<sup><xref ref-type="table-fn" rid="table3fn9">i</xref></sup>, median (IQR)</td><td align="left" valign="top">19 (17&#x2010;22)</td><td align="left" valign="top">16 (15&#x2010;17)</td><td align="left" valign="top">14 (12&#x2010;15)</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>11.770<sup><xref ref-type="table-fn" rid="table3fn1">a</xref></sup><sup><xref ref-type="table-fn" rid="table3fn5">e</xref></sup></p></list-item><list-item><p>10.042<sup><xref ref-type="table-fn" rid="table3fn3">c</xref></sup><sup><xref ref-type="table-fn" rid="table3fn5">e</xref></sup></p></list-item><list-item><p>12.007<sup><xref ref-type="table-fn" rid="table3fn4">d</xref></sup><sup><xref ref-type="table-fn" rid="table3fn5">e</xref></sup></p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>&#x003C;.001<sup><xref ref-type="table-fn" rid="table3fn1">a</xref></sup><sup><xref ref-type="table-fn" rid="table3fn3">c</xref></sup><sup><xref ref-type="table-fn" rid="table3fn4">d</xref></sup></p></list-item></list></td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>SMOG<sup><xref ref-type="table-fn" rid="table3fn10">j</xref></sup> Index, median (IQR)</td><td align="left" valign="top">14 (13&#x2010;15)</td><td align="left" valign="top">12 (11&#x2010;13)</td><td align="left" valign="top">10 (9&#x2010;11)</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>11.784<sup><xref ref-type="table-fn" rid="table3fn1">a</xref></sup><sup><xref ref-type="table-fn" rid="table3fn5">e</xref></sup></p></list-item><list-item><p>10.451<sup><xref ref-type="table-fn" rid="table3fn3">c</xref></sup><sup><xref ref-type="table-fn" rid="table3fn5">e</xref></sup></p></list-item><list-item><p>12.187<sup><xref ref-type="table-fn" rid="table3fn4">d</xref></sup><sup><xref ref-type="table-fn" rid="table3fn5">e</xref></sup></p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>&#x003C;.001<sup><xref ref-type="table-fn" rid="table3fn1">a</xref></sup><sup><xref ref-type="table-fn" rid="table3fn3">c</xref></sup><sup><xref ref-type="table-fn" rid="table3fn4">d</xref></sup></p></list-item></list></td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>CLI<sup><xref ref-type="table-fn" rid="table3fn11">k</xref></sup>, median (IQR)</td><td align="left" valign="top">18 (17&#x2010;20)</td><td align="left" valign="top">17 (16&#x2010;18)</td><td align="left" valign="top">15 (14&#x2010;16)</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>11.475<sup><xref ref-type="table-fn" rid="table3fn1">a</xref></sup><sup><xref ref-type="table-fn" rid="table3fn5">e</xref></sup></p></list-item><list-item><p>4.746<sup><xref ref-type="table-fn" rid="table3fn3">c</xref></sup><sup><xref ref-type="table-fn" rid="table3fn5">e</xref></sup></p></list-item><list-item><p>10.811<sup><xref ref-type="table-fn" rid="table3fn4">d</xref></sup><sup><xref ref-type="table-fn" rid="table3fn5">e</xref></sup></p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>&#x003C;.001<sup><xref ref-type="table-fn" rid="table3fn1">a</xref></sup><sup><xref ref-type="table-fn" rid="table3fn3">c</xref></sup><sup><xref ref-type="table-fn" rid="table3fn4">d</xref></sup></p></list-item></list></td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>ARI<sup><xref ref-type="table-fn" rid="table3fn12">l</xref></sup>, median (IQR)</td><td align="left" valign="top">17 (15&#x2010;19)</td><td align="left" valign="top">16 (15&#x2010;17)</td><td align="left" valign="top">14 (12&#x2010;15)</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>11.408<sup><xref ref-type="table-fn" rid="table3fn1">a</xref></sup><sup><xref ref-type="table-fn" rid="table3fn5">e</xref></sup></p></list-item><list-item><p>3.759<sup><xref ref-type="table-fn" rid="table3fn3">c</xref></sup><sup><xref ref-type="table-fn" rid="table3fn5">e</xref></sup></p></list-item><list-item><p>9.641<sup><xref ref-type="table-fn" rid="table3fn4">d</xref></sup><sup><xref ref-type="table-fn" rid="table3fn5">e</xref></sup></p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>&#x003C;.001<sup><xref ref-type="table-fn" rid="table3fn1">a</xref></sup><sup><xref ref-type="table-fn" rid="table3fn3">c</xref></sup><sup><xref ref-type="table-fn" rid="table3fn4">d</xref></sup></p></list-item></list></td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Reading age (y); median (IQR)</td><td align="left" valign="top">23 (21&#x2010;24)</td><td align="left" valign="top">21 (20&#x2010;22)</td><td align="left" valign="top">19 (17&#x2010;20)</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>11.511<sup><xref ref-type="table-fn" rid="table3fn1">a</xref></sup><sup><xref ref-type="table-fn" rid="table3fn5">e</xref></sup></p></list-item><list-item><p>8.210<sup><xref ref-type="table-fn" rid="table3fn3">c</xref></sup><sup><xref ref-type="table-fn" rid="table3fn5">e</xref></sup></p></list-item><list-item><p>11.545<sup><xref ref-type="table-fn" rid="table3fn4">d</xref></sup><sup><xref ref-type="table-fn" rid="table3fn5">e</xref></sup></p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>&#x003C;.001<sup><xref ref-type="table-fn" rid="table3fn1">a</xref></sup><sup><xref ref-type="table-fn" rid="table3fn3">c</xref></sup><sup><xref ref-type="table-fn" rid="table3fn4">d</xref></sup></p></list-item></list></td></tr></tbody></table><table-wrap-foot><fn id="table3fn1"><p><sup>a</sup>ChatGPT-4 simple prompt versus ChatGPT-4 extended prompt.</p></fn><fn id="table3fn2"><p><sup>b</sup>Wilcoxon signed ranks test based on negative ranks.</p></fn><fn id="table3fn3"><p><sup>c</sup>Original lay summary versus ChatGPT-4 simple prompt.</p></fn><fn id="table3fn4"><p><sup>d</sup>Original lay summary versus ChatGPT-4 extended prompt.</p></fn><fn id="table3fn5"><p><sup>e</sup>Wilcoxon signed-ranks test based on positive ranks.</p></fn><fn id="table3fn6"><p><sup>f</sup>In FKRE, the higher values indicate easier readability. For all indices except FKRE, lower values indicate easier readability.</p></fn><fn id="table3fn7"><p><sup>g</sup>FKRE: Flesch-Kincaid Reading Ease.</p></fn><fn id="table3fn8"><p><sup>h</sup>FKGL: Flesch-Kincaid Grade Level.</p></fn><fn id="table3fn9"><p><sup>i</sup>GFS: Gunning Fog Score.</p></fn><fn id="table3fn10"><p><sup>j</sup>SMOG: Simple Measure of Gobbledygook.</p></fn><fn id="table3fn11"><p><sup>k</sup>CLI: Coleman-Liau Index.</p></fn><fn id="table3fn12"><p><sup>l</sup>ARI: Automated Readability Index.</p></fn></table-wrap-foot></table-wrap><table-wrap id="t4" position="float"><label>Table 4.</label><caption><p>Factual accuracy of the original lay summaries and those generated by ChatGPT-4 (simple vs extended prompt). Italic letters indicate statistical significance (N=204).</p></caption><table id="table4" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Assessment of factual accuracy, readability (FKRE<sup><xref ref-type="table-fn" rid="table4fn1">a</xref></sup>), and word count</td><td align="left" valign="bottom">Original lay summary</td><td align="left" valign="bottom">ChatGPT-4 simple prompt</td><td align="left" valign="top">ChatGPT-4 extended prompt</td><td align="left" valign="bottom">Standardized test statistic (<italic>Z</italic> values)</td><td align="left" valign="bottom"><italic>P</italic> values</td></tr></thead><tbody><tr><td align="left" valign="top">Factual accuracy score 1 (performed by MM)</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"><list list-type="bullet"><list-item><p>.626<sup><xref ref-type="table-fn" rid="table4fn2">b</xref></sup><sup><xref ref-type="table-fn" rid="table4fn6">f</xref></sup></p></list-item><list-item><p>3.994<sup><xref ref-type="table-fn" rid="table4fn3">c</xref></sup><sup><xref ref-type="table-fn" rid="table4fn5">e</xref></sup></p></list-item><list-item><p>3.631<sup><xref ref-type="table-fn" rid="table4fn4">d</xref></sup><sup><xref ref-type="table-fn" rid="table4fn5">e</xref></sup></p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>.53<sup><xref ref-type="table-fn" rid="table4fn2">b</xref></sup></p></list-item><list-item><p>&#x003C;.001<sup><xref ref-type="table-fn" rid="table4fn3">c</xref></sup><sup><xref ref-type="table-fn" rid="table4fn4">d</xref></sup></p></list-item></list></td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>1 point, n (%)</td><td align="left" valign="top">0 (0)</td><td align="left" valign="top">0 (0)</td><td align="left" valign="top">0 (0)</td><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>2 points, n (%)</td><td align="left" valign="top">0 (0)</td><td align="left" valign="top">0 (0)</td><td align="left" valign="top">0 (0)</td><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>3 points, n (%)</td><td align="left" valign="top">15 (7.4)</td><td align="left" valign="top">1 (0.5)</td><td align="left" valign="top">1 (0.5)</td><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>4 points, n (%)</td><td align="left" valign="top">45 (22)</td><td align="left" valign="top">35 (17)</td><td align="left" valign="top">38 (19)</td><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>5 points, n (%)</td><td align="left" valign="top">144 (71)</td><td align="left" valign="top">168 (82)</td><td align="left" valign="top">165 (81)</td><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Median (IQR)</td><td align="left" valign="top">5 (4-5)</td><td align="left" valign="top">5 (5-5)</td><td align="left" valign="top">5 (5-5)</td><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top">Factual accuracy score 2 (performed by JB)</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"><list list-type="bullet"><list-item><p>1.512<sup><xref ref-type="table-fn" rid="table4fn2">b</xref></sup><sup><xref ref-type="table-fn" rid="table4fn5">e</xref></sup></p></list-item><list-item><p>4.845<sup><xref ref-type="table-fn" rid="table4fn3">c</xref></sup><sup><xref ref-type="table-fn" rid="table4fn5">e</xref></sup></p></list-item><list-item><p>5.507<sup><xref ref-type="table-fn" rid="table4fn4">d</xref></sup><sup><xref ref-type="table-fn" rid="table4fn5">e</xref></sup></p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>.13<sup><xref ref-type="table-fn" rid="table4fn2">b</xref></sup></p></list-item><list-item><p>&#x003C;.001<sup><xref ref-type="table-fn" rid="table4fn3">c</xref></sup><sup><xref ref-type="table-fn" rid="table4fn4">d</xref></sup></p></list-item></list></td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>1 point, n (%)</td><td align="left" valign="top">0 (0)</td><td align="left" valign="top">0 (0)</td><td align="left" valign="top">0 (0)</td><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>2 points, n (%)</td><td align="left" valign="top">2 (1.0)</td><td align="left" valign="top">0 (0)</td><td align="left" valign="top">0 (0)</td><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>3 points, n (%)</td><td align="left" valign="top">20 (9.8)</td><td align="left" valign="top">4 (2.0)</td><td align="left" valign="top">2 (1.0)</td><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>4 points, n (%)</td><td align="left" valign="top">55 (27)</td><td align="left" valign="top">38 (19)</td><td align="left" valign="top">34 (17)</td><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>5 points, n (%)</td><td align="left" valign="top">127 (62)</td><td align="left" valign="top">162 (79)</td><td align="left" valign="top">168 (82)</td><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Median (IQR)</td><td align="left" valign="top">5 (4-5)</td><td align="left" valign="top">5 (5-5)</td><td align="left" valign="top">5 (5-5)</td><td align="left" valign="top"/><td align="left" valign="top"/></tr></tbody></table><table-wrap-foot><fn id="table4fn1"><p><sup>a</sup>FKRE: Flesch-Kincaid Reading Ease.</p></fn><fn id="table4fn2"><p><sup>b</sup> ChatGPT-4 simple prompt versus ChatGPT-4 extended prompt.</p></fn><fn id="table4fn3"><p><sup>c</sup> Original lay summary versus ChatGPT-4 simple prompt.</p></fn><fn id="table4fn4"><p><sup>d</sup>Original lay summary versus ChatGPT-4 extended prompt.</p></fn><fn id="table4fn5"><p><sup>e</sup>Wilcoxon signed ranks test based on negative ranks. </p></fn><fn id="table4fn6"><p><sup>f</sup>Wilcoxon signed ranks test based on positive ranks.</p></fn></table-wrap-foot></table-wrap><table-wrap id="t5" position="float"><label>Table 5.</label><caption><p>Assessment of factual accuracy, readability (Flesch-Kincaid Reading Ease [FKRE]), and word count, leading to an overall quality assessment of the original lay summaries and those generated by ChatGPT-4 (simple vs extended prompt). Italic letters indicate statistical significance (N=204).</p></caption><table id="table5" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Parameter</td><td align="left" valign="bottom">Original lay summary</td><td align="left" valign="bottom">ChatGPT-4 simple prompt</td><td align="left" valign="bottom">ChatGPT-4 extended prompt</td><td align="left" valign="bottom">Standardized test statistic (<italic>Z</italic> values)</td><td align="left" valign="bottom"><italic>P</italic> values</td></tr></thead><tbody><tr><td align="left" valign="top">Factual accuracy scores; overall evaluation</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"><list list-type="bullet"><list-item><p>.816<sup><xref ref-type="table-fn" rid="table5fn1">a</xref></sup><sup><xref ref-type="table-fn" rid="table5fn2">b</xref></sup></p></list-item><list-item><p>3.789<sup><xref ref-type="table-fn" rid="table5fn2">b</xref></sup><sup><xref ref-type="table-fn" rid="table5fn3">c</xref></sup></p></list-item><list-item><p>3.980<sup><xref ref-type="table-fn" rid="table5fn2">b</xref></sup><sup><xref ref-type="table-fn" rid="table5fn4">d</xref></sup></p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>.41<sup><xref ref-type="table-fn" rid="table5fn1">a</xref></sup></p></list-item><list-item><p>&#x003C;.001<sup><xref ref-type="table-fn" rid="table5fn3">c</xref></sup><sup><xref ref-type="table-fn" rid="table5fn4">d</xref></sup></p></list-item></list></td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>1 rating &#x003C;4, n (%)</td><td align="left" valign="top">13 (6.4)</td><td align="left" valign="top">3 (1.5)</td><td align="left" valign="top">1 (0.5)</td><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>2 ratings &#x003C;4; n (%)</td><td align="left" valign="top">12 (5.9)</td><td align="left" valign="top">1 (0.5)</td><td align="left" valign="top">1 (0.5)</td><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top">FKRE</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"><list list-type="bullet"><list-item><p>7.066<sup><xref ref-type="table-fn" rid="table5fn1">a</xref></sup><sup><xref ref-type="table-fn" rid="table5fn2">b</xref></sup></p></list-item><list-item><p>9.869<sup><xref ref-type="table-fn" rid="table5fn2">b</xref></sup><sup><xref ref-type="table-fn" rid="table5fn3">c</xref></sup></p></list-item><list-item><p>11.252<sup><xref ref-type="table-fn" rid="table5fn2">b</xref></sup><sup><xref ref-type="table-fn" rid="table5fn4">d</xref></sup></p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>&#x003C;.001<sup><xref ref-type="table-fn" rid="table5fn1">a</xref></sup><sup><xref ref-type="table-fn" rid="table5fn3">c</xref></sup><sup><xref ref-type="table-fn" rid="table5fn4">d</xref></sup></p></list-item></list></td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>FKRE 29.9&#x2010;20, n (%)</td><td align="left" valign="top">59 (29)</td><td align="left" valign="top">50 (24.5)</td><td align="left" valign="top">3 (1.5)</td><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>FKRE&#x003C;20, n (%)</td><td align="left" valign="top">99 (49)</td><td align="left" valign="top">10 (4.9)</td><td align="left" valign="top">1 (0.5)</td><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top">Wrong number of words, n (%)</td><td align="left" valign="top">140 (69)</td><td align="left" valign="top">40 (20)</td><td align="left" valign="top">39 (19)</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>.160<sup><xref ref-type="table-fn" rid="table5fn1">a</xref></sup><sup><xref ref-type="table-fn" rid="table5fn2">b</xref></sup></p></list-item><list-item><p>9.869<sup><xref ref-type="table-fn" rid="table5fn2">b</xref></sup><sup><xref ref-type="table-fn" rid="table5fn3">c</xref></sup></p></list-item><list-item><p>8.962<sup><xref ref-type="table-fn" rid="table5fn2">b</xref></sup><sup><xref ref-type="table-fn" rid="table5fn4">d</xref></sup></p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>.87<sup><xref ref-type="table-fn" rid="table5fn1">a</xref></sup></p></list-item><list-item><p>&#x003C;.001<sup><xref ref-type="table-fn" rid="table5fn3">c</xref></sup><sup><xref ref-type="table-fn" rid="table5fn4">d</xref></sup></p></list-item></list></td></tr><tr><td align="left" valign="top">Overall quality assessment</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"><list list-type="bullet"><list-item><p>5.758<sup><xref ref-type="table-fn" rid="table5fn1">a</xref></sup><sup><xref ref-type="table-fn" rid="table5fn2">b</xref></sup></p></list-item><list-item><p>11.260<sup><xref ref-type="table-fn" rid="table5fn2">b</xref></sup><sup><xref ref-type="table-fn" rid="table5fn3">c</xref></sup></p></list-item><list-item><p>11.741<sup><xref ref-type="table-fn" rid="table5fn2">b</xref></sup><sup><xref ref-type="table-fn" rid="table5fn4">d</xref></sup></p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>&#x003C;.001<sup><xref ref-type="table-fn" rid="table5fn1">a</xref></sup><sup><xref ref-type="table-fn" rid="table5fn3">c</xref></sup><sup><xref ref-type="table-fn" rid="table5fn4">d</xref></sup></p></list-item></list></td></tr><tr><td align="left" valign="top" colspan="4"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>High quality</td><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>0 points, n (%)</td><td align="left" valign="top">11 (5.4)</td><td align="left" valign="top">112 (55)</td><td align="left" valign="top">161 (79)</td><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Minor limitations</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>1 point; n (%)</td><td align="left" valign="top">48 (24)</td><td align="left" valign="top">72 (35)</td><td align="left" valign="top">40 (20)</td><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>2 points, n (%)</td><td align="left" valign="top">68 (33)</td><td align="left" valign="top">17 (8.3)</td><td align="left" valign="top">2 (1)</td><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Total, n (%)</td><td align="left" valign="top">116 (57)</td><td align="left" valign="top">89 (44)</td><td align="left" valign="top">42 (21)</td><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Moderate limitations</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>3 points, n (%)</td><td align="left" valign="top">64 (31)</td><td align="left" valign="top">3 (1.5)</td><td align="left" valign="top">1 (0.5)</td><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Major limitations</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>4 points, n (%)</td><td align="left" valign="top">7 (3.4)</td><td align="left" valign="top">0 (0)</td><td align="left" valign="top">0 (0)</td><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>5 points, n (%)</td><td align="left" valign="top">6 (2.9)</td><td align="left" valign="top">0 (0)</td><td align="left" valign="top">0 (0)</td><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Total, n (%)</td><td align="left" valign="top">13 (6.4)</td><td align="left" valign="top">0 (0)</td><td align="left" valign="top">0 (0)</td><td align="left" valign="top"/><td align="left" valign="top"/></tr></tbody></table><table-wrap-foot><fn id="table5fn1"><p><sup>a</sup>ChatGPT-4 simple prompt versus ChatGPT-4 extended prompt.</p></fn><fn id="table5fn2"><p><sup>b</sup>Wilcoxon signed ranks test based on positive ranks.</p></fn><fn id="table5fn3"><p><sup>c</sup>Original lay summary versus ChatGPT-4 simple prompt.</p></fn><fn id="table5fn4"><p><sup>d</sup>Original lay summary versus ChatGPT-4 extended prompt.</p></fn><fn id="table5fn5"><p><sup>e</sup>Wilcoxon signed ranks test based on negative ranks.</p></fn></table-wrap-foot></table-wrap><fig position="float" id="figure1"><label>Figure 1.</label><caption><p>Readability scores measured by the Flesch-Kincaid Reading Ease (FKRE) for the original lay summaries and for ChatGPT-4-generated summaries using simple and extended prompts. The x-axis shows the 3 summary types, and the y-axis displays FKRE values (higher scores indicate easier readability). Color coding represents corresponding factual accuracy scores, with higher scores reflecting better fidelity to the source text. Median values are displayed as horizontal lines. Negative FKRE values were reset to 0 for visualization to preserve interpretability of the scale. Group comparisons were performed using the Wilcoxon signed-rank test.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="jmir_v27i1e76598_fig01.png"/></fig><p>These findings were consistent across the subgroups of clinical, basic, and translational research articles. In each domain, prompts generated by ChatGPT-4 yielded a higher proportion of high-quality patient summaries than the original lay summaries. This was primarily driven by improvements in readability metrics and factual accuracy. Detailed analyses are provided in Tables S1-S3 in <xref ref-type="supplementary-material" rid="app4">Multimedia Appendix 4</xref>.</p><p>Group differences among basic, clinical, and translational research articles regarding length metrics, readability scores, and factual accuracy</p></sec><sec id="s3-3"><title>Original Lay Summaries</title><p>We found that the lay summaries of translational science articles and clinical science articles contained significantly fewer words compared to those of basic science articles. Translational science lay summaries also contained fewer sentences than basic science lay summaries. Basic research lay summaries showed fewer factual inaccuracies than translational science lay summaries. Basic science lay summaries received a more favorable overall evaluation compared to those from clinical science. Apart from this, no significant differences were observed among the lay summaries of clinical, basic, or translational science articles with respect to readability metrics, text length metrics, factual accuracy, or the overall evaluation of the summaries. Detailed analyses are depicted in Tables S4 and S5 in <xref ref-type="supplementary-material" rid="app4">Multimedia Appendix 4</xref>.</p></sec><sec id="s3-4"><title>ChatGPT-4 Simple Prompt</title><p>Compared to clinical science and translational science lay summaries, basic science lay summaries contained fewer complex words, a lower percentage of complex words, and fewer syllables per word. They also showed significantly higher FKRE and lower FKGL scores. In addition, the reading age was lower than that of translational science lay summaries (Tables S4 and S6, in <xref ref-type="supplementary-material" rid="app4">Multimedia Appendix 4</xref>).</p></sec><sec id="s3-5"><title>ChatGPT-4 Extended Prompt</title><p>Basic science lay summaries contained fewer complex words and fewer syllables per word compared to clinical science lay summaries. They also showed higher FKRE, lower FKGL, and lower Gunning Fog Scores, a lower Smog index, and a lower reading age than clinical science lay summaries. Compared to translational science lay summaries, clinical lay summaries contained more sentences and more words. Tables S4 and S7 in <xref ref-type="supplementary-material" rid="app4">Multimedia Appendix 4</xref> provide a comparative overview including detailed analyses.</p></sec></sec><sec id="s4" sec-type="discussion"><title>Discussion</title><sec id="s4-1"><title>Principal Findings</title><p>This study provides a large-scale evaluation of ChatGPT-4&#x2019;s ability to generate lay summaries for biomedical research, using prostate cancer articles published in <italic>Cancers</italic> as a testbed. Through a direct comparison of human-written and AI-generated lay summaries across 2 prompting strategies, we assessed differences in readability, factual accuracy, and adherence to editorial guidelines. Findings suggest that generative AI, when properly guided, can significantly enhance the clarity and accessibility of scientific communication.</p><p>Consistent with prior work, our results confirm that many author-generated summaries exceed recommended reading levels and fail to meet readability thresholds, reflecting the difficulty of translating technical content for a general audience [<xref ref-type="bibr" rid="ref12">12</xref>-<xref ref-type="bibr" rid="ref14">14</xref>]. Domain expertise alone does not ensure clarity, as lay language writing remains an untrained skill for many scientists [<xref ref-type="bibr" rid="ref10">10</xref>,<xref ref-type="bibr" rid="ref15">15</xref>]. Against this backdrop, our findings demonstrate that ChatGPT-4 can produce summaries with improved readability and a more coherent structure than human-written alternatives.</p><p>These findings are consistent with emerging work demonstrating how AI systems may improve both the accessibility and trustworthiness of biomedical communication. Markowitz [<xref ref-type="bibr" rid="ref22">22</xref>] highlights the broader societal potential of AI to support science communication, and &#x0160;uto Pavi&#x010D;i&#x0107; et al [<xref ref-type="bibr" rid="ref23">23</xref>] document direct improvements in readability and presentation of oncology-related lay summaries, reinforcing the practical implications of our results.</p><p>The observed performance gap between simple and extended prompts highlights the importance of prompt design. This finding is consistent with our prior study, which demonstrated that carefully tailored prompts can improve both linguistic quality and content precision in AI-generated summaries [<xref ref-type="bibr" rid="ref19">19</xref>]. Subgroup analysis revealed consistent domain-specific differences: basic science summaries, both human- and AI-generated, tended to use simpler language and, in some cases, contained fewer factual inaccuracies than clinical or translational summaries. This suggests that summarization performance may vary across biomedical domains, indicating a potential need for domain-adapted prompts or training and domain-sensitive quality checks in editorial workflows.</p><p>Our evaluation framework extends earlier work that focused predominantly on linguistic simplicity [<xref ref-type="bibr" rid="ref17">17</xref>,<xref ref-type="bibr" rid="ref18">18</xref>,<xref ref-type="bibr" rid="ref23">23</xref>] by integrating measures of editorial integrity, such as adherence to word count and factual accuracy, into a standardized comparison with human-authored content. Conducting the study within the editorial environment of a journal requiring lay summaries ensured assessment under realistic conditions and offers a preliminary transferable model for future implementation.</p><p>Beyond improving editorial efficiency, AI-assisted summarization may reduce variability in author performance, alleviate researchers' workload, and promote more equitable access to knowledge, thereby supporting broader goals of patient and public engagement [<xref ref-type="bibr" rid="ref1">1</xref>-<xref ref-type="bibr" rid="ref5">5</xref>,<xref ref-type="bibr" rid="ref29">29</xref>].</p><p>Alongside these practical benefits, the responsible use of generative AI must be guided by ethical and practical safeguards. Although ChatGPT-4 outputs showed strong quality in this study, LLMs remain vulnerable to hallucinations and lack intrinsic fact-checking mechanisms. Human oversight remains indispensable to ensure accuracy and ethical integrity, and concerns about reproducibility, bias, and transparency require ongoing attention [<xref ref-type="bibr" rid="ref21">21</xref>].</p><p>Editorial boards should carefully evaluate the integration of AI-assisted summarization within a structured peer-review process to ensure the integrity and trustworthiness of content delivered to the public.</p><p>Finally, our methodology operationalizes several DECIDE-AI recommendations, such as prompt standardization, performance benchmarking, and blinded evaluation. Although this study does not constitute a clinical deployment, it may serve as a preparatory model for future AI-assisted health communication tools [<xref ref-type="bibr" rid="ref24">24</xref>].</p></sec><sec id="s4-2"><title>Limitations</title><p>Several limitations merit consideration. First, this study focused exclusively on prostate cancer articles published in a single journal, which limits the generalizability of our findings to other medical disciplines or editorial ecosystems. Second, while independent experts evaluated all summaries, qualitative aspects such as tone, nuance, and audience engagement remain partially subjective, even when assessed using structured rubrics [<xref ref-type="bibr" rid="ref10">10</xref>,<xref ref-type="bibr" rid="ref15">15</xref>]. Third, the composite quality definition using thresholds for FKRE, factual accuracy, and word count, while pragmatic, is necessarily somewhat arbitrary given the absence of consensus on minimal FKRE standards for lay summaries. Alternative thresholds could yield different classification outcomes. These parameters should therefore be regarded as exploratory benchmarks rather than universal standards. Fourth, the performance of ChatGPT-4 is specific to its current model iteration; as LLMs continue to evolve, future updates may produce different results. Therefore, the reproducibility and temporal consistency of AI-generated outputs warrant ongoing scrutiny. Fifth, the potential for hallucinations must be carefully considered when applying LLMs in any context. Although no evidence for such hallucinations was observed in this study&#x2019;s setting, likely due to the constrained task of generating lay summaries on the basis of article metadata, LLMs are inherently prone to these errors due to the probabilistic nature of their architecture. This limitation is particularly relevant in health care contexts and should be addressed through editorial safeguards, including expert oversight and review processes that combine automated generation with human validation.</p><p>Finally, and most importantly, this study did not include patients, caregivers, or members of the general public to evaluate comprehension, perceived clarity, or trust from the perspective of lay readers. These endpoints are critical for determining the real-world communicative effectiveness of lay summaries [<xref ref-type="bibr" rid="ref13">13</xref>,<xref ref-type="bibr" rid="ref17">17</xref>,<xref ref-type="bibr" rid="ref22">22</xref>], highlighting an important gap given the growing emphasis on co-designed digital health communication [<xref ref-type="bibr" rid="ref1">1</xref>-<xref ref-type="bibr" rid="ref5">5</xref>,<xref ref-type="bibr" rid="ref29">29</xref>,<xref ref-type="bibr" rid="ref30">30</xref>]. Practical approaches should follow scientifically rigorous methodological protocols. For example, blinded rating of comprehension using Likert scales or testing understanding by asking lay persons to reproduce the content of a lay summary in their own words, with meaningful operationalization of results, can help ensure validity and reproducibility. In addition, inclusion of lay readers could involve structured comprehension surveys, focus groups, or co-design workshops, thereby supporting the development of lay summaries that meet the informational needs and expectations of end users. Prior research indicates that users often cannot reliably distinguish AI-generated from human-authored texts [<xref ref-type="bibr" rid="ref31">31</xref>-<xref ref-type="bibr" rid="ref33">33</xref>], and the impact of labeling content as AI-generated remains unclear. Some evidence suggests that explicit AI disclosure may reduce trust [<xref ref-type="bibr" rid="ref34">34</xref>-<xref ref-type="bibr" rid="ref36">36</xref>], yet transparency is essential for ethical communication.</p><p>Future work should prioritize rigorous usability testing that incorporates feedback from lay audiences through thoughtfully designed studies. Such evaluations should go beyond assessing comprehension to also examine potential downstream effects, including improved patient knowledge, increased confidence, and enhanced shared decision-making. Such efforts will be vital to ensuring that generative AI truly enhances patient-centered communication rather than merely optimizing textual outputs.</p></sec><sec id="s4-3"><title>Conclusions</title><p>This study suggests that, when guided by carefully structured prompts, ChatGPT-4 can generate lay summaries that, within the context of prostate cancer articles and editorial requirements evaluated here, demonstrate improved readability, factual accuracy, and adherence to word count guidelines compared to human-written versions. Prompt optimization notably influences output quality, indicating a scalable approach to enhancing accessibility in scientific communication.</p><p>The broader adoption of generative AI tools in editorial workflows offers a promising opportunity to democratize knowledge, reduce variability in lay communication, and strengthen public trust in science. To realize these benefits responsibly, journals should consider implementing concrete measures. First, structured prompt templates could be offered to authors at submission to encourage more consistent and high-quality lay summaries. Second, all AI-assisted summaries should undergo mandatory human editorial review to ensure factual accuracy and safeguard against potential errors or omissions. Third, alignment with established health literacy and plain language frameworks is essential to guarantee accessibility across diverse readerships. Finally, publishers may also explore the development or adoption of in-house AI models to maintain institutional control, protect data privacy, and reduce dependence on external providers.</p><p>Future research should extend beyond technical evaluations to include direct user testing with diverse patient populations, integrating comprehension studies, focus groups, and co-design workshops. Such efforts will be pivotal in validating the accessibility and trustworthiness of AI-generated lay communication and in shaping evidence-based editorial policies that balance innovation with responsibility.</p></sec></sec></body><back><ack><p>Generative artificial intelligence tools were used for generating lay summaries, as detailed in the Methods section. No generative artificial intelligence tools were used in the drafting, editing, or revision of the manuscript.</p></ack><notes><sec><title>Data Availability</title><p>The original contributions presented in this study are included in the article/Supplementary Material. Further inquiries can be directed to the corresponding author.</p></sec></notes><fn-group><fn fn-type="con"><p>Conceptualization: MH, MM, ER</p><p>Data curation: JB, SE, MH, CK, MM, ER, AS, VS</p><p>Formal analysis: MH</p><p>Methodology: MH, MM, ER</p><p>Visualization: ER</p><p/><p>Writing&#x2014;original draft preparation: MH, MM, ER</p><p>Writing&#x2014;review and editing: ER, SE, VS, CK, AK, AS, JB, C Go&#x00DF;ler, RM, C Gilfrich, MB, DvW, HB, CW, ASM, MH, MM</p><p>All authors have read and agreed to the published version of the manuscript.</p></fn><fn fn-type="conflict"><p>None declared.</p></fn></fn-group><glossary><title>Abbreviations</title><def-list><def-item><term id="abb1">AI</term><def><p>artificial intelligence</p></def></def-item><def-item><term id="abb2">DECIDE-AI</term><def><p>Developmental and Exploratory Clinical Investigations of Decision-Support Systems driven by Artificial Intelligence</p></def></def-item><def-item><term id="abb3">FKGL</term><def><p>Flesch-Kincaid Grade Level</p></def></def-item><def-item><term id="abb4">FKRE</term><def><p>Flesch-Kincaid Reading Ease</p></def></def-item><def-item><term id="abb5">LLM</term><def><p>large language model</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Pushparajah</surname><given-names>DS</given-names> </name><name name-style="western"><surname>Manning</surname><given-names>E</given-names> </name><name name-style="western"><surname>Michels</surname><given-names>E</given-names> </name><name name-style="western"><surname>Arnaudeau-B&#x00E9;gard</surname><given-names>C</given-names> </name></person-group><article-title>Value of developing plain language summaries of scientific and clinical articles: a survey of patients and physicians</article-title><source>Ther Innov Regul Sci</source><year>2018</year><month>07</month><volume>52</volume><issue>4</issue><fpage>474</fpage><lpage>481</lpage><pub-id pub-id-type="doi">10.1177/2168479017738723</pub-id><pub-id pub-id-type="medline">29714545</pub-id></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Witteman</surname><given-names>HO</given-names> </name><name name-style="western"><surname>Chipenda Dansokho</surname><given-names>S</given-names> </name><name name-style="western"><surname>Colquhoun</surname><given-names>H</given-names> </name><etal/></person-group><article-title>Twelve lessons learned for effective research partnerships between patients, caregivers, clinicians, academic researchers, and other stakeholders</article-title><source>J Gen Intern Med</source><year>2018</year><month>04</month><volume>33</volume><issue>4</issue><fpage>558</fpage><lpage>562</lpage><pub-id pub-id-type="doi">10.1007/s11606-017-4269-6</pub-id><pub-id pub-id-type="medline">29327211</pub-id></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Banner</surname><given-names>D</given-names> </name><name name-style="western"><surname>Bains</surname><given-names>M</given-names> </name><name name-style="western"><surname>Carroll</surname><given-names>S</given-names> </name><etal/></person-group><article-title>Patient and public engagement in integrated knowledge translation research: Are we there yet?</article-title><source>Res Involv Engagem</source><year>2019</year><volume>5</volume><issue>8</issue><fpage>8</fpage><pub-id pub-id-type="doi">10.1186/s40900-019-0139-1</pub-id><pub-id pub-id-type="medline">30805202</pub-id></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sheridan</surname><given-names>S</given-names> </name><name name-style="western"><surname>Schrandt</surname><given-names>S</given-names> </name><name name-style="western"><surname>Forsythe</surname><given-names>L</given-names> </name><name name-style="western"><surname>Hilliard</surname><given-names>TS</given-names> </name><name name-style="western"><surname>Paez</surname><given-names>KA</given-names> </name><collab>Advisory Panel on Patient Engagement (2013 inaugural panel)</collab></person-group><article-title>The PCORI Engagement Rubric: Promising practices for partnering in research</article-title><source>Ann Fam Med</source><year>2017</year><month>03</month><volume>15</volume><issue>2</issue><fpage>165</fpage><lpage>170</lpage><pub-id pub-id-type="doi">10.1370/afm.2042</pub-id><pub-id pub-id-type="medline">28289118</pub-id></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Brett</surname><given-names>J</given-names> </name><name name-style="western"><surname>Staniszewska</surname><given-names>S</given-names> </name><name name-style="western"><surname>Mockford</surname><given-names>C</given-names> </name><etal/></person-group><article-title>Mapping the impact of patient and public involvement on health and social care research: a systematic review</article-title><source>Health Expect</source><year>2014</year><month>10</month><volume>17</volume><issue>5</issue><fpage>637</fpage><lpage>650</lpage><pub-id pub-id-type="doi">10.1111/j.1369-7625.2012.00795.x</pub-id><pub-id pub-id-type="medline">22809132</pub-id></nlm-citation></ref><ref id="ref6"><label>6</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Petrini</surname><given-names>C</given-names> </name></person-group><article-title>Regulation (EU) No 536/2014 on clinical trials on medicinal products for human use: an overview</article-title><source>Ann Ist Super Sanita</source><year>2014</year><volume>50</volume><issue>4</issue><fpage>317</fpage><lpage>321</lpage><pub-id pub-id-type="doi">10.4415/ANN_14_04_04</pub-id><pub-id pub-id-type="medline">25522070</pub-id></nlm-citation></ref><ref id="ref7"><label>7</label><nlm-citation citation-type="report"><article-title>Summaries of clinical trial results for laypersons. recommendations of the expert group on clinical trials for the implementation of regulation (EU) no 536/2014 on clinical trials on medicinal products for human use</article-title><year>2021</year><access-date>2025-11-06</access-date><publisher-name>European Commission</publisher-name><comment><ext-link ext-link-type="uri" xlink:href="https://health.ec.europa.eu/system/files/2020-02/2017_01_26_summaries_of_ct_results_for_laypersons_0.pdf">https://health.ec.europa.eu/system/files/2020-02/2017_01_26_summaries_of_ct_results_for_laypersons_0.pdf</ext-link></comment></nlm-citation></ref><ref id="ref8"><label>8</label><nlm-citation citation-type="web"><article-title>Cancers: instructions for authors</article-title><source>Multidisciplinary Digital Publishing Institute</source><access-date>2025-02-19</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.mdpi.com/journal/cancers/instructions">https://www.mdpi.com/journal/cancers/instructions</ext-link></comment></nlm-citation></ref><ref id="ref9"><label>9</label><nlm-citation citation-type="web"><article-title>Guide for authors</article-title><source>European Urology</source><access-date>2025-02-19</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.europeanurology.com/guide-for-authors">https://www.europeanurology.com/guide-for-authors</ext-link></comment></nlm-citation></ref><ref id="ref10"><label>10</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kirkpatrick</surname><given-names>E</given-names> </name><name name-style="western"><surname>Gaisford</surname><given-names>W</given-names> </name><name name-style="western"><surname>Williams</surname><given-names>E</given-names> </name><name name-style="western"><surname>Brindley</surname><given-names>E</given-names> </name><name name-style="western"><surname>Tembo</surname><given-names>D</given-names> </name><name name-style="western"><surname>Wright</surname><given-names>D</given-names> </name></person-group><article-title>Understanding Plain English summaries. A comparison of two approaches to improve the quality of Plain English summaries in research reports</article-title><source>Res Involv Engagem</source><year>2017</year><volume>3</volume><issue>1</issue><fpage>17</fpage><pub-id pub-id-type="doi">10.1186/s40900-017-0064-0</pub-id><pub-id pub-id-type="medline">29062542</pub-id></nlm-citation></ref><ref id="ref11"><label>11</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Gainey</surname><given-names>KM</given-names> </name><name name-style="western"><surname>Smith</surname><given-names>J</given-names> </name><name name-style="western"><surname>McCaffery</surname><given-names>KJ</given-names> </name><name name-style="western"><surname>Clifford</surname><given-names>S</given-names> </name><name name-style="western"><surname>Muscat</surname><given-names>DM</given-names> </name></person-group><article-title>What author instructions do health journals provide for writing plain language summaries? A scoping review</article-title><source>Patient</source><year>2023</year><month>01</month><volume>16</volume><issue>1</issue><fpage>31</fpage><lpage>42</lpage><pub-id pub-id-type="doi">10.1007/s40271-022-00606-7</pub-id><pub-id pub-id-type="medline">36301440</pub-id></nlm-citation></ref><ref id="ref12"><label>12</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Hamnes</surname><given-names>B</given-names> </name><name name-style="western"><surname>van Eijk-Hustings</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Primdahl</surname><given-names>J</given-names> </name></person-group><article-title>Readability of patient information and consent documents in rheumatological studies</article-title><source>BMC Med Ethics</source><year>2016</year><month>07</month><day>16</day><volume>17</volume><issue>1</issue><fpage>42</fpage><pub-id pub-id-type="doi">10.1186/s12910-016-0126-0</pub-id><pub-id pub-id-type="medline">27422433</pub-id></nlm-citation></ref><ref id="ref13"><label>13</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ganjavi</surname><given-names>C</given-names> </name><name name-style="western"><surname>Eppler</surname><given-names>MB</given-names> </name><name name-style="western"><surname>Ramacciotti</surname><given-names>LS</given-names> </name><name name-style="western"><surname>Cacciamani</surname><given-names>GE</given-names> </name></person-group><article-title>Clinical patient summaries not fit for purpose: a study in urology</article-title><source>Eur Urol Focus</source><year>2023</year><month>11</month><volume>9</volume><issue>6</issue><fpage>1068</fpage><lpage>1071</lpage><pub-id pub-id-type="doi">10.1016/j.euf.2023.06.003</pub-id><pub-id pub-id-type="medline">37349181</pub-id></nlm-citation></ref><ref id="ref14"><label>14</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Shiely</surname><given-names>F</given-names> </name><name name-style="western"><surname>Daly</surname><given-names>A</given-names> </name></person-group><article-title>Trial lay summaries were not fit for purpose</article-title><source>J Clin Epidemiol</source><year>2023</year><month>04</month><volume>156</volume><issue>105-112</issue><fpage>105</fpage><lpage>112</lpage><pub-id pub-id-type="doi">10.1016/j.jclinepi.2023.02.023</pub-id><pub-id pub-id-type="medline">36868328</pub-id></nlm-citation></ref><ref id="ref15"><label>15</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Graham</surname><given-names>S</given-names> </name><name name-style="western"><surname>Brookey</surname><given-names>J</given-names> </name></person-group><article-title>Do patients understand?</article-title><source>Perm J</source><year>2008</year><volume>12</volume><issue>3</issue><fpage>67</fpage><lpage>69</lpage><pub-id pub-id-type="doi">10.7812/TPP/07-144</pub-id><pub-id pub-id-type="medline">21331214</pub-id></nlm-citation></ref><ref id="ref16"><label>16</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Goldsack</surname><given-names>T</given-names> </name><name name-style="western"><surname>Scarton</surname><given-names>C</given-names> </name><name name-style="western"><surname>Shardlow</surname><given-names>M</given-names> </name><name name-style="western"><surname>Lin</surname><given-names>C</given-names> </name></person-group><article-title>Overview of the biolaysumm 2024 shared task on the lay summarization of biomedical research articles</article-title><access-date>2025-11-06</access-date><conf-name>Proceedings of the 23rd Workshop on Biomedical Natural Language Processing</conf-name><conf-date>Aug 16, 2024</conf-date><comment><ext-link ext-link-type="uri" xlink:href="https://aclanthology.org/2024.bionlp-1">https://aclanthology.org/2024.bionlp-1</ext-link></comment><pub-id pub-id-type="doi">10.18653/v1/2024.bionlp-1.10</pub-id></nlm-citation></ref><ref id="ref17"><label>17</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Shyr</surname><given-names>C</given-names> </name><name name-style="western"><surname>Grout</surname><given-names>RW</given-names> </name><name name-style="western"><surname>Kennedy</surname><given-names>N</given-names> </name><etal/></person-group><article-title>Leveraging artificial intelligence to summarize abstracts in lay language for increasing research accessibility and transparency</article-title><source>J Am Med Inform Assoc</source><year>2024</year><month>10</month><day>1</day><volume>31</volume><issue>10</issue><fpage>2294</fpage><lpage>2303</lpage><pub-id pub-id-type="doi">10.1093/jamia/ocae186</pub-id></nlm-citation></ref><ref id="ref18"><label>18</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Eppler</surname><given-names>MB</given-names> </name><name name-style="western"><surname>Ganjavi</surname><given-names>C</given-names> </name><name name-style="western"><surname>Knudsen</surname><given-names>JE</given-names> </name><etal/></person-group><article-title>Bridging the gap between urological research and patient understanding: the role of large language models in automated generation of layperson&#x2019;s summaries</article-title><source>Urol Pract</source><year>2023</year><month>09</month><volume>10</volume><issue>5</issue><fpage>436</fpage><lpage>443</lpage><pub-id pub-id-type="doi">10.1097/UPJ.0000000000000428</pub-id><pub-id pub-id-type="medline">37410015</pub-id></nlm-citation></ref><ref id="ref19"><label>19</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Rinderknecht</surname><given-names>E</given-names> </name><name name-style="western"><surname>Schmelzer</surname><given-names>A</given-names> </name><name name-style="western"><surname>Kravchuk</surname><given-names>A</given-names> </name><etal/></person-group><article-title>Leveraging large language models for high-quality lay summaries: efficacy of ChatGPT-4 with custom prompts in a consecutive series of prostate cancer manuscripts</article-title><source>Curr Oncol</source><year>2025</year><month>02</month><day>11</day><volume>32</volume><issue>2</issue><fpage>102</fpage><pub-id pub-id-type="doi">10.3390/curroncol32020102</pub-id><pub-id pub-id-type="medline">39996902</pub-id></nlm-citation></ref><ref id="ref20"><label>20</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Yang</surname><given-names>X</given-names> </name><name name-style="western"><surname>Xiao</surname><given-names>Y</given-names> </name><etal/></person-group><article-title>Enhancing doctor-patient communication using large language models for pathology report interpretation</article-title><source>BMC Med Inform Decis Mak</source><year>2025</year><month>01</month><day>23</day><volume>25</volume><issue>1</issue><fpage>36</fpage><pub-id pub-id-type="doi">10.1186/s12911-024-02838-z</pub-id><pub-id pub-id-type="medline">39849504</pub-id></nlm-citation></ref><ref id="ref21"><label>21</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Yang</surname><given-names>X</given-names> </name><name name-style="western"><surname>Xiao</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Liu</surname><given-names>D</given-names> </name><etal/></person-group><article-title>Enhancing physician-patient communication in oncology using GPT-4 through simplified radiology reports: multicenter quantitative study</article-title><source>J Med Internet Res</source><year>2025</year><month>04</month><day>17</day><volume>27</volume><fpage>e63786</fpage><pub-id pub-id-type="doi">10.2196/63786</pub-id><pub-id pub-id-type="medline">40245397</pub-id></nlm-citation></ref><ref id="ref22"><label>22</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Markowitz</surname><given-names>DM</given-names> </name></person-group><article-title>From complexity to clarity: How AI enhances perceptions of scientists and the public&#x2019;s understanding of science</article-title><source>PNAS Nexus</source><year>2024</year><month>09</month><volume>3</volume><issue>9</issue><fpage>pgae387</fpage><pub-id pub-id-type="doi">10.1093/pnasnexus/pgae387</pub-id><pub-id pub-id-type="medline">39290437</pub-id></nlm-citation></ref><ref id="ref23"><label>23</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>&#x0160;uto Pavi&#x010D;i&#x0107;</surname><given-names>J</given-names> </name><name name-style="western"><surname>Maru&#x0161;i&#x0107;</surname><given-names>A</given-names> </name><name name-style="western"><surname>Buljan</surname><given-names>I</given-names> </name></person-group><article-title>Using ChatGPT to improve the presentation of plain language summaries of Cochrane systematic reviews about oncology interventions: cross-sectional study</article-title><source>JMIR Cancer</source><year>2025</year><month>03</month><day>19</day><volume>11</volume><fpage>e63347</fpage><pub-id pub-id-type="doi">10.2196/63347</pub-id><pub-id pub-id-type="medline">40106236</pub-id></nlm-citation></ref><ref id="ref24"><label>24</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Vasey</surname><given-names>B</given-names> </name><name name-style="western"><surname>Nagendran</surname><given-names>M</given-names> </name><name name-style="western"><surname>Campbell</surname><given-names>B</given-names> </name><etal/></person-group><article-title>Reporting guideline for the early stage clinical evaluation of decision support systems driven by artificial intelligence: DECIDE-AI</article-title><source>BMJ</source><year>2022</year><month>05</month><day>18</day><volume>377</volume><fpage>e070904</fpage><pub-id pub-id-type="doi">10.1136/bmj-2022-070904</pub-id><pub-id pub-id-type="medline">35584845</pub-id></nlm-citation></ref><ref id="ref25"><label>25</label><nlm-citation citation-type="web"><article-title>Readability test</article-title><source>WebFX</source><year>2025</year><access-date>2025-03-31</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.webfx.com/tools/read-able/">https://www.webfx.com/tools/read-able/</ext-link></comment></nlm-citation></ref><ref id="ref26"><label>26</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Flesch</surname><given-names>R</given-names> </name></person-group><article-title>A new readability yardstick</article-title><source>J Appl Psychol</source><year>1948</year><month>06</month><volume>32</volume><issue>3</issue><fpage>221</fpage><lpage>233</lpage><pub-id pub-id-type="doi">10.1037/h0057532</pub-id><pub-id pub-id-type="medline">18867058</pub-id></nlm-citation></ref><ref id="ref27"><label>27</label><nlm-citation citation-type="web"><person-group person-group-type="author"><name name-style="western"><surname>DuBay</surname><given-names>WH</given-names> </name></person-group><article-title>The principles of readability</article-title><source>Education Resources Information Center</source><year>2004</year><access-date>2025-11-06</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://eric.ed.gov/?id=ed490073">https://eric.ed.gov/?id=ed490073</ext-link></comment></nlm-citation></ref><ref id="ref28"><label>28</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Brega</surname><given-names>AG</given-names> </name><name name-style="western"><surname>Barnard</surname><given-names>J</given-names> </name><name name-style="western"><surname>Mabachi</surname><given-names>NM</given-names> </name><etal/></person-group><source>AHRQ Health Literacy Universal Precautions Toolkit</source><year>2015</year><access-date>2025-11-06</access-date><edition>2</edition><publisher-name>Agency for Healthcare Research and Quality</publisher-name><comment><ext-link ext-link-type="uri" xlink:href="https://www.ahrq.gov/sites/default/files/publications/files/healthlittoolkit2_3.pdf">https://www.ahrq.gov/sites/default/files/publications/files/healthlittoolkit2_3.pdf</ext-link></comment></nlm-citation></ref><ref id="ref29"><label>29</label><nlm-citation citation-type="book"><person-group person-group-type="author"><collab>World Health Organization</collab></person-group><source>Global Strategy on Digital Health 2020-2025</source><year>2021</year><edition>1</edition><publisher-name>World Health Organization</publisher-name><pub-id pub-id-type="doi">10.1007/978-3-030-05325-3_125-1</pub-id><pub-id pub-id-type="other">9789240020924</pub-id></nlm-citation></ref><ref id="ref30"><label>30</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Conard</surname><given-names>S</given-names> </name></person-group><article-title>Best practices in digital health literacy</article-title><source>Int J Cardiol</source><year>2019</year><month>10</month><day>1</day><volume>292</volume><issue>277-279</issue><fpage>277</fpage><lpage>279</lpage><pub-id pub-id-type="doi">10.1016/j.ijcard.2019.05.070</pub-id><pub-id pub-id-type="medline">31230937</pub-id></nlm-citation></ref><ref id="ref31"><label>31</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Clark</surname><given-names>E</given-names> </name><name name-style="western"><surname>August</surname><given-names>T</given-names> </name><name name-style="western"><surname>Serrano</surname><given-names>S</given-names> </name><name name-style="western"><surname>Haduong</surname><given-names>N</given-names> </name><name name-style="western"><surname>Gururangan</surname><given-names>S</given-names> </name><name name-style="western"><surname>Smith</surname><given-names>NA</given-names> </name></person-group><article-title>All that&#x2019;s &#x2018;human&#x2019; is not gold: evaluating human evaluation of generated text</article-title><year>2021</year><conf-name>Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing (Volume 1</conf-name><comment><ext-link ext-link-type="uri" xlink:href="https://aclanthology.org/2021.acl-long">https://aclanthology.org/2021.acl-long</ext-link></comment><pub-id pub-id-type="doi">10.18653/v1/2021.acl-long.565</pub-id></nlm-citation></ref><ref id="ref32"><label>32</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Jakesch</surname><given-names>M</given-names> </name><name name-style="western"><surname>Hancock</surname><given-names>JT</given-names> </name><name name-style="western"><surname>Naaman</surname><given-names>M</given-names> </name></person-group><article-title>Human heuristics for AI-generated language are flawed</article-title><source>Proc Natl Acad Sci U S A</source><year>2023</year><month>03</month><day>14</day><volume>120</volume><issue>11</issue><fpage>e2208839120</fpage><pub-id pub-id-type="doi">10.1073/pnas.2208839120</pub-id><pub-id pub-id-type="medline">36881628</pub-id></nlm-citation></ref><ref id="ref33"><label>33</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wu</surname><given-names>J</given-names> </name><name name-style="western"><surname>Yang</surname><given-names>S</given-names> </name><name name-style="western"><surname>Zhan</surname><given-names>R</given-names> </name><name name-style="western"><surname>Yuan</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Chao</surname><given-names>LS</given-names> </name><name name-style="western"><surname>Wong</surname><given-names>DF</given-names> </name></person-group><article-title>A survey on LLM-generated text detection: necessity, methods, and future directions</article-title><source>Comput Linguist Assoc Comput Linguist</source><year>2025</year><month>03</month><day>15</day><volume>51</volume><issue>1</issue><fpage>275</fpage><lpage>338</lpage><pub-id pub-id-type="doi">10.1162/coli_a_00549</pub-id></nlm-citation></ref><ref id="ref34"><label>34</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wittenberg</surname><given-names>C</given-names> </name><name name-style="western"><surname>Epstein</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Berinsky</surname><given-names>AJ</given-names> </name><name name-style="western"><surname>Rand</surname><given-names>DG</given-names> </name></person-group><article-title>Labeling AI-generated content: Promises, perils, and future directions</article-title><source>MIT Explor Gener AI</source><year>2024</year><comment><ext-link ext-link-type="uri" xlink:href="https://mit-genai.pubpub.org/novel-chemicals-to-opera">https://mit-genai.pubpub.org/novel-chemicals-to-opera</ext-link></comment><pub-id pub-id-type="doi">10.21428/e4baedd9.0319e3a6</pub-id></nlm-citation></ref><ref id="ref35"><label>35</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wittenberg</surname><given-names>C</given-names> </name><name name-style="western"><surname>Epstein</surname><given-names>Z</given-names> </name><name name-style="western"><surname>P&#x00E9;loquin-Skulski</surname><given-names>G</given-names> </name><name name-style="western"><surname>Berinsky</surname><given-names>AJ</given-names> </name><name name-style="western"><surname>Rand</surname><given-names>DG</given-names> </name></person-group><article-title>Labeling AI-generated media online</article-title><source>PNAS Nexus</source><year>2025</year><month>06</month><volume>4</volume><issue>6</issue><fpage>pgaf170</fpage><pub-id pub-id-type="doi">10.1093/pnasnexus/pgaf170</pub-id><pub-id pub-id-type="medline">40519990</pub-id></nlm-citation></ref><ref id="ref36"><label>36</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Altay</surname><given-names>S</given-names> </name><name name-style="western"><surname>Gilardi</surname><given-names>F</given-names> </name></person-group><article-title>People are skeptical of headlines labeled as AI-generated, even if true or human-made, because they assume full AI automation</article-title><source>PNAS Nexus</source><year>2024</year><month>10</month><volume>3</volume><issue>10</issue><fpage>pgae403</fpage><pub-id pub-id-type="doi">10.1093/pnasnexus/pgae403</pub-id><pub-id pub-id-type="medline">39359399</pub-id></nlm-citation></ref></ref-list><app-group><supplementary-material id="app1"><label>Multimedia Appendix 1</label><p>Detailed information on prompt development.</p><media xlink:href="jmir_v27i1e76598_app1.pdf" xlink:title="PDF File, 171 KB"/></supplementary-material><supplementary-material id="app2"><label>Multimedia Appendix 2</label><p>Examples of lay summaries with their corresponding factual accuracy ratings and explanations for the assigned scores.</p><media xlink:href="jmir_v27i1e76598_app2.pdf" xlink:title="PDF File, 218 KB"/></supplementary-material><supplementary-material id="app3"><label>Multimedia Appendix 3</label><p>Details of included articles, abstracts, keywords, and original and ChatGPT-4-generated lay summaries with readability metrics, word counts, and comprehension scores.</p><media xlink:href="jmir_v27i1e76598_app3.pdf" xlink:title="PDF File, 6906 KB"/></supplementary-material><supplementary-material id="app4"><label>Multimedia Appendix 4</label><p>Evaluation of lay summaries stratified into clinical, basic, and translational research.</p><media xlink:href="jmir_v27i1e76598_app4.pdf" xlink:title="PDF File, 398 KB"/></supplementary-material><supplementary-material id="app5"><label>Checklist 1</label><p>DECIDE-AI checklist</p><media xlink:href="jmir_v27i1e76598_app5.pdf" xlink:title="PDF File, 358 KB"/></supplementary-material></app-group></back></article>