<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="research-article"><front><journal-meta><journal-id journal-id-type="nlm-ta">J Med Internet Res</journal-id><journal-id journal-id-type="publisher-id">jmir</journal-id><journal-id journal-id-type="index">1</journal-id><journal-title>Journal of Medical Internet Research</journal-title><abbrev-journal-title>J Med Internet Res</abbrev-journal-title><issn pub-type="epub">1438-8871</issn><publisher><publisher-name>JMIR Publications</publisher-name><publisher-loc>Toronto, Canada</publisher-loc></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">v28i1e94234</article-id><article-id pub-id-type="doi">10.2196/94234</article-id><article-categories><subj-group subj-group-type="heading"><subject>Viewpoint</subject></subj-group></article-categories><title-group><article-title>Backcasting the Trust Gap: A Strategic Road Map for Clinician Adoption of AI Diagnostics by 2040</article-title></title-group><contrib-group><contrib contrib-type="author" corresp="yes"><name name-style="western"><surname>Yu</surname><given-names>Yunguo</given-names></name><degrees>MD, PhD</degrees><xref ref-type="aff" rid="aff1"/></contrib></contrib-group><aff id="aff1"><institution>Zyter|TruCare</institution><addr-line>2600 Tower Oaks Blvd, Suite 700</addr-line><addr-line>Rockville</addr-line><addr-line>MD</addr-line><country>United States</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Mesko</surname><given-names>Bertalan</given-names></name></contrib><contrib contrib-type="editor"><name name-style="western"><surname>Krist&#x00F3;f</surname><given-names>Tam&#x00E1;s</given-names></name></contrib></contrib-group><contrib-group><contrib contrib-type="reviewer"><name name-style="western"><surname>Manne</surname><given-names>Tirumala Ashish Kumar</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Patil</surname><given-names>Vikas</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to Yunguo Yu, MD, PhD, Zyter|TruCare, 2600 Tower Oaks Blvd, Suite 700, Rockville, MD, 20852, United States, 1 6177801330; <email>yuyunguo@gmail.com</email></corresp></author-notes><pub-date pub-type="collection"><year>2026</year></pub-date><pub-date pub-type="epub"><day>30</day><month>4</month><year>2026</year></pub-date><volume>28</volume><elocation-id>e94234</elocation-id><history><date date-type="received"><day>26</day><month>02</month><year>2026</year></date><date date-type="rev-recd"><day>08</day><month>04</month><year>2026</year></date><date date-type="accepted"><day>10</day><month>04</month><year>2026</year></date></history><copyright-statement>&#x00A9; Yunguo Yu. Originally published in the Journal of Medical Internet Research (<ext-link ext-link-type="uri" xlink:href="https://www.jmir.org">https://www.jmir.org</ext-link>), 30.4.2026. </copyright-statement><copyright-year>2026</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in the Journal of Medical Internet Research (ISSN 1438-8871), is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="https://www.jmir.org/">https://www.jmir.org/</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://www.jmir.org/2026/1/e94234"/><abstract><p>The integration of artificial intelligence (AI) into clinical medicine presents a persistent paradox: diagnostic models routinely demonstrate benchmark superiority over human experts, yet bedside adoption remains fragile, and clinician trust is low. Conventional forecasting approaches&#x2014;projecting model performance along optimistic trend lines&#x2014;are epistemologically insufficient because they cannot account for the nonlinear sociotechnical transitions that separate technical capability from institutional trust. This Viewpoint applies backcasting, a normative futures methodology with a 4-decade evidence base in energy policy and public governance, to the specific challenge of clinician adoption of AI diagnostics, with the aim of identifying the structural interventions required to achieve durable trust by 2040. Consistent with the tradition of single-expert normative foresight analysis, we applied backcasting as a structured reasoning framework using a STEEP (social, technological, economic, environmental, and political) analysis. Sources from PubMed, IEEE Xplore, Google Scholar, and policy repositories (the US Food and Drug Administration, World Health Organization, Organisation for Economic Co-Operation and Development, and European Commission) published between 2010 and 2025 were reviewed; barriers and enablers were coded across STEEP dimensions to identify pivot points representing convergent, time-bound structural changes. Working backward from a defined 2040 vision state&#x2014;a health care ecosystem with risk-stratified clinician trust thresholds, semantic transparency of AI outputs, integrated AI governance, and futures literacy in medical education&#x2014;we identified three temporal pivot points: (1) the 2030 standardization of dual-process AI architectures, in which large language models are verified in real time by locally deployed small language models, producing a calibrated confidence score; (2) the 2035 institutionalization of agentic AI orchestration governed by a formally designated chief AI officer; and (3) the 2040 integration of futures literacy and human-AI teaming competencies into standard medical curricula. The AI trust gap is an institutional design problem, not a technical inevitability. Backcasting reframes the central question from &#x201C;when will AI be ready for medicine?&#x201D; to &#x201C;what must we build to make medicine ready for AI?&#x201D; The 3 pivot points identified here&#x2014;verifiable AI by 2030, agentic governance by 2035, and futures literacy by 2040&#x2014;are structural commitments that clinicians, health system leaders, and policymakers can begin building today.</p></abstract><kwd-group><kwd>medical futures studies</kwd><kwd>backcasting</kwd><kwd>trustworthy artificial intelligence</kwd><kwd>trustworthy AI</kwd><kwd>small language models</kwd><kwd>confidence calibration</kwd><kwd>clinical decision support</kwd><kwd>futures literacy</kwd><kwd>health policy</kwd><kwd>implementation science</kwd></kwd-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><sec id="s1-1"><title>Background</title><p>The clinical deployment of artificial intelligence (AI)&#x2013;based diagnostic tools has produced a striking asymmetry: abundant proof-of-concept demonstrations coexist with a persistent deficit of durable bedside adoption. Large language models (LLMs) and deep learning classifiers regularly achieve expert-level performance on curated benchmark datasets, yet surveys of practicing clinicians consistently reveal skepticism, reluctance, and what has been termed &#x201C;automation resistance&#x201D; [<xref ref-type="bibr" rid="ref1">1</xref>]. This is not primarily a technological problem. The bottleneck lies in the sociotechnical substrate: the transparency of AI outputs, the governance structures that certify their safety, the training programs that build clinical fluency, and&#x2014;above all&#x2014;the absence of a structured institutional pathway from pilot to practice.</p><p>The dominant response to this challenge has been <italic>forecasting</italic>: projecting from current trends in model accuracy, regulatory approval timelines, and computational costs to estimate when &#x201C;AI will be ready.&#x201D; This approach is epistemologically inadequate. Forecasting assumes that current trajectories are sufficient conditions for a desired outcome, ignoring the nonlinear policy, cultural, and organizational shifts that are necessary preconditions for deep integration. In complex adaptive systems such as health care, desired futures do not emerge spontaneously from optimistic trend lines; they must be actively constructed through deliberate intervention.</p><p>Medical futures studies, a nascent subfield identified and characterized by Mesk&#x00F3; et al [<xref ref-type="bibr" rid="ref2">2</xref>], offers a rigorous alternative. A 2024 scoping review of health care foresight found that only 8 of more than 50 established futures methods are currently applied in health care [<xref ref-type="bibr" rid="ref2">2</xref>]. This represents a critical underutilization of the foresight evidence base, which has been routinely applied in economics, national defense, and environmental policy for over 4 decades [<xref ref-type="bibr" rid="ref3">3</xref>].</p><p>This Viewpoint applies <italic>backcasting</italic> [<xref ref-type="bibr" rid="ref4">4</xref>,<xref ref-type="bibr" rid="ref5">5</xref>] to the specific challenge of clinician AI adoption.</p><p>Rather than asking <italic>&#x201C;</italic>what will happen?<italic>&#x201D;</italic> backcasting asks &#x201C;given a desired future, what must happen to get there?&#x201D; This reframing is essential: the goal is not to predict when AI will be trusted, but to identify the precise structural interventions that will make it trustworthy.</p><p>The components of this road map&#x2014;verification architectures, governance roles, and human-AI teaming (HAT) frameworks&#x2014;each have precedents in adjacent AI governance and implementation science literature. The contribution of this Viewpoint is not the individual components in isolation, but rather three integrated advances: (1) the use of backcasting as a unifying methodological framework applied specifically to clinician AI adoption, thereby addressing 4 of Dreborg&#x2019;s [<xref ref-type="bibr" rid="ref5">5</xref>] canonical conditions for preferring backcasting over forecasting; (2) the identification of 3 pivot points as a dependencies-ordered causal sequence, in which each pivot is a necessary precondition for the next; and (3) the synthesis of dual-process verification, chief AI officer (CAIO) governance, and futures literacy as interlocking necessary conditions rather than parallel independent recommendations.</p><p>The aim of this Viewpoint is to apply backcasting as a structured foresight methodology to identify the minimum set of structural interventions required to achieve durable clinician trust in AI diagnostics by 2040.</p></sec><sec id="s1-2"><title>Terminology</title><p>Throughout this Viewpoint, trust denotes calibrated clinician reliance on AI-generated outputs, as measured by validated automation trust instruments; confidence denotes a model-reported posterior probability estimate; adoption denotes durable clinical use beyond the pilot phase; automation resistance denotes persistent nonuse of AI tools despite demonstrated utility; and semantic transparency denotes the property whereby AI-generated outputs are natively linked to verifiable, computable evidence sources.</p></sec></sec><sec id="s2"><title>Why Backcasting Is the Appropriate Method</title><p>Backcasting was first formalized in energy policy analysis by Robinson [<xref ref-type="bibr" rid="ref4">4</xref>] as a method for navigating problems where present trends are insufficient, or actively counterproductive, for achieving long-term goals. Dreborg&#x2019;s [<xref ref-type="bibr" rid="ref5">5</xref>] canonical definition identifies 4 conditions under which backcasting is the preferred approach over forecasting: the problem is serious, conventional trends are part of the problem, a long time horizon is needed, and dominant interests are implicated. All four conditions apply to the AI trust crisis in medicine:</p><list list-type="order"><list-item><p>Seriousness&#x2014;fragmented AI adoption risks entrenching &#x201C;pilot-phase perpetuity,&#x201D; a state in which clinical AI tools are perpetually evaluated but never institutionalized, consuming resources without improving outcomes.</p></list-item><list-item><p>Current trends as part of the problem&#x2014;the prevailing trajectory, releasing increasingly powerful LLMs into clinical contexts without any verification infrastructure, increases the rate of hallucination-driven errors, actively eroding the trust that adoption requires [<xref ref-type="bibr" rid="ref6">6</xref>]. Independent benchmarking of LLMs on clinical tasks has corroborated this concern, documenting systematic calibration failures and overconfident outputs across specialties [<xref ref-type="bibr" rid="ref7">7</xref>,<xref ref-type="bibr" rid="ref8">8</xref>].</p></list-item><list-item><p>Long time horizon&#x2014;institutional change in health care (ie, medical education reform, regulatory framework development, and governance role creation) operates on decade-scale timelines, not product release cycles.</p></list-item><list-item><p>Dominant interests&#x2014;technology vendors, health systems, payers, and regulators each hold competing stakes in the current pilot-phase status quo. Only a normative vision of a desired future can align these interests around a common structural agenda.</p></list-item></list><p>Backcasting therefore offers a particularly appropriate methodological lens for addressing the AI trust gap in medicine&#x2014;one aligned with each of the conditions under which Dreborg [<xref ref-type="bibr" rid="ref5">5</xref>] argues that forecasting alone is insufficient.</p></sec><sec id="s3"><title>Methodological Execution in This Viewpoint</title><p>Consistent with the tradition of single-expert normative foresight analysis, this Viewpoint applies backcasting as a structured reasoning framework rather than as a fully participatory design process. The pivot points were identified through a literature-grounded synthesis executed as follows:</p><list list-type="bullet"><list-item><p>Sources and timeframe&#x2014;we searched PubMed, IEEE Xplore, Google Scholar, and policy repositories (the US Food and Drug Administration [FDA], World Health Organization, Organisation for Economic Co-Operation and Development, and European Commission) for publications from 2010 to 2025.</p></list-item><list-item><p>Search terms&#x2014;search terms included clinical AI adoption, LLM hallucination health care, confidence calibration clinical decision support, AI governance health systems, CAIO, Futures Literacy medical education, and HAT.</p></list-item><list-item><p>Social, technological, economic, environmental, and political (STEEP) operationalization&#x2014;each identified barrier or enabler was coded to &#x2265;1 STEEP dimensions; the 3 pivot points represent the convergence of barriers appearing across multiple dimensions within a shared time horizon.</p></list-item><list-item><p>Prioritization&#x2014;no formal Delphi or consensus technique was applied; selection reflects single-expert synthesis and is therefore subject to the perspective constraints acknowledged below.</p></list-item></list><p>We acknowledge that formal backcasting, as originally conceived by Robinson [<xref ref-type="bibr" rid="ref4">4</xref>] and Dreborg [<xref ref-type="bibr" rid="ref5">5</xref>], typically involves stakeholder cocreation, Delphi consensus, and scenario matrix comparison; those components are outside the scope of this Viewpoint and represent a natural next phase of this research agenda. The vision state, pivot points, and threshold specifications reported here should therefore be interpreted as normative hypotheses requiring multistakeholder validation, not as empirically confirmed targets.</p></sec><sec id="s4"><title>Defining the 2040 Vision State</title><p>The normative foundation of any backcasting exercise is the <italic>vision state</italic>: a precisely specified description of the future toward which the road map is oriented. Vagueness at this stage compromises the entire analysis; the vision must be operationally defined to permit backward inference.</p><p>We emphasize at the outset that all threshold values, timeline dates, and structural specifications below are normative design hypotheses requiring multistakeholder validation, not empirically confirmed benchmarks. Their empirical validation is a core task for the formal multistakeholder Delphi process that constitutes the natural next phase of this research agenda.</p><p>We define the 2040 vision state as a health care ecosystem characterized by risk-stratified trust thresholds, that is, clinician-reported confidence in <italic>specific AI-assisted diagnostic outputs</italic>&#x2014;distinct from general willingness to adopt AI&#x2014;is assessed using validated trust-in-automation instruments adapted for clinical AI contexts [<xref ref-type="bibr" rid="ref9">9</xref>]. Rather than a single system-wide percentage, the 2040 vision adopts a <italic>risk-stratified trust matrix</italic> in which the required confidence level scales with the clinical task&#x2019;s autonomy level and potential consequence: for <italic>autonomous task execution</italic> (eg, routine medication reconciliation and simple triage flagging), a trust score of &#x2265;90% is required and for <italic>assistive decision support</italic> (eg, differential diagnosis generation and imaging interpretation), a score of 70% to 85% is acceptable contingent on human-in-the-loop verification. Final threshold values should be determined through specialty-stratified empirical validation and formal stakeholder consensus; if heterogeneous baselines are revealed, the road map may require adaptive pivot timelines.</p><p>These ranges are provisional normative anchors, not empirical measurements. Numerical specificity serves a methodological function in backcasting: it constrains the solution space and renders the vision state actionable for governance design. The specific values draw on adjacent automation trust literature [<xref ref-type="bibr" rid="ref9">9</xref>] and should be replaced by specialty-validated figures as they become available through formal consensus methods. These principles are reflected in the following core dimensions of the proposed vision state:</p><list list-type="bullet"><list-item><p>Semantic transparency&#x2014;AI diagnostic outputs are natively linked to verifiable clinical evidence, such that every AI-generated claim carries a computable confidence score grounded in institutional guidelines and peer-reviewed literature.</p></list-item><list-item><p>Integrated governance&#x2014;every major health system operates under a formally designated AI governance structure, including a CAIO, with accountability equivalent to the chief medical officer (CMO).</p></list-item><list-item><p>Futures literacy&#x2014;medical graduates are trained in HAT and basic foresight competencies as standard components of the clinical curriculum.</p></list-item></list><p>This vision state is not utopian. Each element has precedent in an adjacent domain: semantic transparency parallels drug labeling requirements, the CAIO mirrors the chief information security officer trajectory of the 2000s, and futures literacy training already exists in business and public policy education. The question is not whether this state is possible, but what sequence of structural interventions will produce it.</p><p>A key concern with backcasting in a rapidly evolving technological domain is whether the vision state end point remains stable across the road map horizon. We address this by defining the vision state at the institutional level&#x2014;governance structures, curriculum mandates, and trust thresholds&#x2014;rather than at the technological specification level. Institutional targets are more stable than their underlying technology substrates: they describe what the system must do (verify, govern, and educate), not how it does so. Drug labeling requirements, for example, have survived multiple reformulation cycles without revision to the underlying mandate. We therefore propose an adaptive checkpoint mechanism: at each pivot point, the road map&#x2019;s technological assumptions should be reviewed against the prevailing state of the art. If those assumptions have materially shifted&#x2014;for example, if the hallucination problem is addressed by architectural means that make local small language model (SLM) verification unnecessary&#x2014;the subsequent pivot timelines should be adjusted accordingly, while the vision state&#x2019;s institutional targets remain fixed.</p></sec><sec id="s5"><title>Working Backward: 3 Temporal Pivot Points</title><p>Backcasting proceeds by identifying <italic>pivot points</italic>&#x2014;the minimum set of structural changes that, if achieved by a given date, preserve the feasibility of the 2040 vision. We identify 3 such pivot points using a STEEP framework [<xref ref-type="bibr" rid="ref3">3</xref>] to ensure that each pivot is analyzed across all relevant dimensions.</p><sec id="s5-1"><title>The 2030 Pivot: The Verifiable AI Standard</title><p>The most proximate&#x2014;and technically tractable&#x2014;obstacle to the 2040 vision is the <italic>hallucination</italic> problem: the tendency of LLMs to generate plausible-sounding but factually incorrect clinical claims. Current mitigation strategies (retrieval-augmented generation and post hoc explainability tools) are insufficient because they operate retrospectively and rely on human review as the primary error-catching mechanism [<xref ref-type="bibr" rid="ref10">10</xref>].</p><p>We propose that the critical technological pivot point for 2030 is the standardization of dual-process AI architectures, conceptually grounded in dual-process theory [<xref ref-type="bibr" rid="ref11">11</xref>]. This framework includes the following components:</p><list list-type="bullet"><list-item><p>LLM (system 1)&#x2014;a general-purpose or fine-tuned LLM generates a rapid, high-breadth diagnostic hypothesis from unstructured clinical data (notes, laboratory results, and imaging reports).</p></list-item><list-item><p>SLM (system 2)&#x2014;a domain-embedded SLM, running on local hardware (on premises), cross-references the LLM&#x2019;s output against a curated institutional guideline corpus <italic>G</italic> and a real-time literature index. It acts as a preceptor, not overriding the LLM but quantifying the evidentiary support for each diagnostic claim. The feasibility of privacy-preserving, redundant local multiagent architectures for clinical tasks has been demonstrated in prototype form [<xref ref-type="bibr" rid="ref12">12</xref>](<xref ref-type="fig" rid="figure1">Figure 1</xref> shows the 3 components).</p></list-item></list><fig position="float" id="figure1"><label>Figure 1.</label><caption><p>Dual-process artificial intelligence architecture (the &#x201C;verifiable AI&#x201D; model)&#x2014;conceptual framework. Patient data are processed in parallel by a large language model (LLM; generative hypothesis generation) and a locally deployed small language model (SLM; evidence-grounded verification against institutional guidelines <italic>G</italic>). Their outputs are combined into a calibrated confidence score <italic>C(H | x, G)</italic>; claims below verification threshold <italic>&#x03B8;</italic> are flagged for mandatory clinician review. This figure depicts a proposed conceptual architecture; the confidence equation represents a formal specification requiring prospective validation in operational clinical environments before deployment. Accessibility description: flowchart showing patient data (electronic health record, laboratory results, and clinical notes) entering 2 parallel processing streams. The upper stream feeds an LLM labeled &#x201C;system 1/fast/creative&#x201D; for hypothesis generation. The lower stream feeds an on-premise SLM labeled &#x201C;system 2/slow/critical&#x201D; for guideline verification. Both streams converge at a calibrated confidence score node; outputs meeting or exceeding threshold <italic>&#x03B8;</italic> are displayed in the clinician interface, while outputs below <italic>&#x03B8;</italic> are flagged for mandatory review. EHR: electronic health record.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="jmir_v28i1e94234_fig01.png"/></fig><p>The output is a calibrated confidence score, defined formally as follows:</p><disp-formula id="E1"><mml:math id="eqn1"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mi>C</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>H</mml:mi><mml:mo>&#x2223;</mml:mo><mml:mi>x</mml:mi><mml:mo>,</mml:mo><mml:mi>G</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>=</mml:mo><mml:mi>P</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>H</mml:mi><mml:mo>&#x2223;</mml:mo><mml:mi>x</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>&#x22C5;</mml:mo><mml:mrow><mml:mn mathvariant="double-struck">1</mml:mn></mml:mrow><mml:mtext>&#x00A0;</mml:mtext><mml:mo stretchy="false">[</mml:mo><mml:msub><mml:mi>S</mml:mi><mml:mrow><mml:mi>S</mml:mi><mml:mi>L</mml:mi><mml:mi>M</mml:mi></mml:mrow></mml:msub><mml:mo stretchy="false">(</mml:mo><mml:mi>H</mml:mi><mml:mo>,</mml:mo><mml:mi>G</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>&#x2265;</mml:mo><mml:mi>&#x03B8;</mml:mi><mml:mo stretchy="false">]</mml:mo></mml:mstyle></mml:mrow></mml:mstyle></mml:math></disp-formula><p>where <italic>H</italic> is the diagnostic hypothesis; <italic>x</italic> is the clinical input vector; <italic>P(H | x)</italic> is the LLM posterior; <italic>S</italic><sub><italic>SLM</italic></sub><italic>(H,G)</italic> is the SLM-derived guideline alignment score; <italic>&#x03B8;</italic> is a system-configured verification threshold; and <italic>&#x1D7D9;[&#x00B7;]</italic> is the indicator function, equal to 1 when the bracketed condition is satisfied and 0 otherwise. Claims falling below the threshold are flagged for mandatory clinician review rather than being silently passed to the interface. This formulation reflects a gating specification for conceptual clarity; alternative continuous calibration formulations (eg, weighted confidence scaling) may be explored in empirical implementation. Preliminary empirical support for this calibration approach was demonstrated in a validation study of 6689 cardiovascular cases from the Medical Information Mart for Intensive Care-III dataset, in which a dynamic confidence and transparency scoring framework reduced clinician override rates to 33.3%, with high-confidence predictions (90%&#x2010;99% confidence) overridden at only 1.7% [<xref ref-type="bibr" rid="ref13">13</xref>]&#x2014;representing a 20-fold reduction in override frequency at the highest confidence tier, a finding that suggests clinicians are meaningfully responsive to calibrated confidence signals when they are made explicitly visible. Analogous calibration challenges have been independently documented across diverse clinical AI systems, suggesting that structured verification layers represent a generalizable architectural need rather than a system-specific fix [<xref ref-type="bibr" rid="ref14">14</xref>,<xref ref-type="bibr" rid="ref15">15</xref>]. Federated learning frameworks for medical SLMs further demonstrate that locally deployed verification models can be collaboratively trained across heterogeneous institutions&#x2014;with drift-aware rank scheduling to handle non&#x2013;independent and identically distributed clinical data distributions&#x2014;while preserving data locality, providing a practical cross-institutional update pathway for the SLM component of the dual-process architecture [<xref ref-type="bibr" rid="ref16">16</xref>].</p><p>The critical structural condition for this pivot to occur by 2030 is not technological&#x2014;SLMs capable of clinical guideline alignment already exist in prototype form&#x2014;but <italic>regulatory</italic>: progressive movement toward formal minimum verification requirements for AI-assisted clinical decision support, analogous to the postmarket surveillance requirements applied to medical devices.</p></sec><sec id="s5-2"><title>The 2035 Pivot: The Agentic Orchestration Shift</title><p>The 2030 pivot establishes the technical precondition for trust. The 2035 pivot concerns the institutional operationalization of that technical foundation. By 2035, under this road map, AI is envisioned to have evolved from a &#x201C;consultant&#x201D; role&#x2014;passively responding to queries&#x2014;to an <italic>agentic orchestrator</italic>: autonomously managing longitudinal care tasks such as postdischarge monitoring, medication reconciliation, and chronic disease management workflows [<xref ref-type="bibr" rid="ref12">12</xref>].</p><p>This shift creates governance risks that current health care leadership structures are not designed to manage. Chief information officers (CIOs) lack clinical authority; CMOs lack AI technical literacy. The emerging CAIO role represents the necessary institutional innovation: a formally credentialed position combining clinical expertise, AI technical competency, and governance authority to certify model safety, audit local calibration, and set institutional AI policy.</p><p>The CAIO is not intended to replace existing executive roles. Rather, the role bridges the CMO&#x2019;s clinical accountability and the CIO&#x2019;s technical infrastructure authority, with specific responsibility for (1) AI model certification and recertification, (2) local calibration auditing and equity reporting, and (3) institutional AI governance policy&#x2014;functions currently unhoused in either the CMO or CIO role. The CAIO would report jointly to the CMO and the board&#x2019;s quality and safety committee, with dotted line coordination with the CIO and chief digital officer on technology procurement and data architecture.</p><p>The STEEP analysis at this pivot reveals that the primary barrier is not technological but <italic>political</italic>: medical licensing boards, accreditation bodies, and malpractice liability frameworks should be revised to recognize AI-assisted decisions as a category of collaborative clinical output, distinct from both autonomous device operation and unassisted physician judgment [<xref ref-type="bibr" rid="ref17">17</xref>].</p><p>The <italic>economic</italic> and <italic>social</italic> dimensions of this pivot are equally consequential. Economically, the CAIO role and its associated governance infrastructure represent a substantial new cost center for health systems; the business case should be grounded in demonstrable efficiency gains from agentic AI&#x2014;reduced administrative burden, improved care coordination, and averted adverse events&#x2014;at a scale sufficient to offset implementation costs. For smaller systems without the resources to sustain a full CAIO function, &#x201C;CAIO-as-a-service&#x201D; models offered through regional health authorities or collaborative networks may provide a viable alternative pathway. Socially, the professional identity of the CAIO remains undefined: whether the role is best filled by a clinician with advanced data science training, a computer scientist with a public health qualification, or an entirely new hybrid professional developed through dedicated graduate programs has direct implications for how the 2040 educational pivot should be designed. Accreditation bodies and medical schools should begin addressing this pipeline question within the current planning horizon.</p></sec><sec id="s5-3"><title>The 2040 Pivot: Futures Literacy as a Clinical Competency</title><p>The final pivot is structural and educational. The 2040 vision state requires clinicians who are not merely passive recipients of AI tools but active participants in shaping the technological environments in which they practice. Integrating futures literacy into medical education requires no new infrastructure; it requires a curriculum decision. Specifically, we propose the incorporation of three elements: (1) scenario analysis into clinical reasoning curricula, replacing some linear differential diagnosis training with probabilistic, multifuture reasoning [<xref ref-type="bibr" rid="ref18">18</xref>]; (2) HAT modules into clerkship training; and (3) foresight workshops into medical leadership development programs, parallel to the quality improvement and health systems science competencies already mandated by accreditation bodies [<xref ref-type="bibr" rid="ref19">19</xref>].</p></sec></sec><sec id="s6"><title>Contingency Logic and Adaptive Pathways</title><p>The 3 pivot points above are presented as a dependencies-ordered sequence, but structured backcasting acknowledges that execution may deviate from normative timelines [<xref ref-type="bibr" rid="ref4">4</xref>,<xref ref-type="bibr" rid="ref5">5</xref>]. We therefore specify a contingency logic for each pivot. If the 2030 verifiable AI standard is not achieved by the target date&#x2014;for example, because regulatory standardization is delayed&#x2014;an interim substitute exists: a voluntary industry consortium standard (analogous to the Health Level 7 Fast Healthcare Interoperability Resources interoperability framework) could provide provisional verification requirements while formal regulation catches up, with the pivot window extended to 2033 and subsequent timelines compressed. If the 2035 agentic governance pivot is delayed, the CAIO-as-a-service model already described provides a distributed interim pathway that does not require mandatory institutional mandates. If the 2040 futures literacy integration has not been achieved, the vision state trust thresholds should be treated as aspirational targets rather than confirmed baselines, and the multistakeholder Delphi process proposed in this Viewpoint becomes the primary recalibration mechanism. At each pivot point, a structured reassessment&#x2014;rather than abandonment of the vision state&#x2014;is the appropriate response to schedule deviation.</p></sec><sec id="s7"><title>The Backcasting Timeline</title><p><xref ref-type="fig" rid="figure2">Figure 2</xref> summarizes the 3 pivot points and their relationship to the 2026 baseline and 2040 vision state. <xref ref-type="table" rid="table1">Table 1</xref> provides a cross-pivot summary of the required changes across the STEEP framework.</p><fig position="float" id="figure2"><label>Figure 2.</label><caption><p>Backcasting timeline: 2026 baseline to 2040 vision state. The dashed arrow represents the normative backcasting direction (vision to present). Open circles mark pivot points where structural interventions should be achieved. The 2040 node carries dual status as pivot 3 (futures literacy) and vision state terminal milestone. AI: artificial intelligence; CAIO: chief artificial intelligence officer; STEEP: social, technological, economic, environmental, and political.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="jmir_v28i1e94234_fig02.png"/></fig><table-wrap id="t1" position="float"><label>Table 1.</label><caption><p>Temporal milestones across the social, technological, economic, environmental, and political framework.</p></caption><table id="table1" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Dimension</td><td align="left" valign="bottom">2026 (baseline)</td><td align="left" valign="bottom">2030 (verifiable)</td><td align="left" valign="bottom">2035 (agentic)</td><td align="left" valign="bottom">2040 (vision)</td></tr></thead><tbody><tr><td align="left" valign="top">Social</td><td align="left" valign="top">Clinician skepticism and automation fear</td><td align="left" valign="top">SLM<sup><xref ref-type="table-fn" rid="table1fn1">a</xref></sup> explainability builds baseline trust</td><td align="left" valign="top">AI<sup><xref ref-type="table-fn" rid="table1fn2">b</xref></sup> fluency enters professional identity</td><td align="left" valign="top">Futures literacy standard</td></tr><tr><td align="left" valign="top">Technological</td><td align="left" valign="top">General LLMs<sup><xref ref-type="table-fn" rid="table1fn3">c</xref></sup> and no verification layer</td><td align="left" valign="top">Dual-process architecture and on-premises SLM</td><td align="left" valign="top">Agentic orchestrators and ambient monitoring</td><td align="left" valign="top">Seamless XAI<sup><xref ref-type="table-fn" rid="table1fn4">d</xref></sup> and ambient intelligence</td></tr><tr><td align="left" valign="top">Economic</td><td align="left" valign="top">Pilot budgets and no billing pathways</td><td align="left" valign="top">SLM hardware ROI<sup><xref ref-type="table-fn" rid="table1fn5">e</xref></sup> established</td><td align="left" valign="top">AI-specific billing codes introduced</td><td align="left" valign="top">System-level AI cost integration</td></tr><tr><td align="left" valign="top">Political</td><td align="left" valign="top">Fragmented and IT-led governance</td><td align="left" valign="top">Regulatory movement toward formal verification requirements</td><td align="left" valign="top">CAIO<sup><xref ref-type="table-fn" rid="table1fn6">f</xref></sup> mandate and liability frameworks revised</td><td align="left" valign="top">AI-native governance as standard</td></tr><tr><td align="left" valign="top">Environmental</td><td align="left" valign="top">Cloud dependent and high energy cost</td><td align="left" valign="top">On-premise inference reduces footprint</td><td align="left" valign="top">Local edge computing architecture</td><td align="left" valign="top">Distributed, low-latency AI fabric</td></tr></tbody></table><table-wrap-foot><fn id="table1fn1"><p><sup>a</sup>SLM: small language model.</p></fn><fn id="table1fn2"><p><sup>b</sup>AI: artificial intelligence.</p></fn><fn id="table1fn3"><p><sup>c</sup>LLM: large language model.</p></fn><fn id="table1fn4"><p><sup>d</sup>XAI: explainable artificial intelligence.</p></fn><fn id="table1fn5"><p><sup>e</sup>ROI: return on investment.</p></fn><fn id="table1fn6"><p><sup>f</sup>CAIO: chief artificial intelligence officer.</p></fn></table-wrap-foot></table-wrap></sec><sec id="s8" sec-type="discussion"><title>Discussion</title><sec id="s8-1"><title>The Automation Bias Risk</title><p>The central finding of this analysis is that the AI trust gap in clinical medicine is not a technical problem awaiting a technical solution, but an institutional design problem amenable to institutional design interventions. Three time-bound structural pivot points&#x2014;verifiable AI (2030), agentic governance (2035), and futures literacy (2040)&#x2014;constitute the minimum set of structural changes required to achieve the defined 2040 vision state. The following subsections examine the second-order risks and implementation considerations that the road map itself generates.</p><p>Backcasting toward greater AI integration must contend with a well-documented risk: as systems become more reliable, clinicians may exhibit automation bias, accepting AI outputs without appropriate critical evaluation [<xref ref-type="bibr" rid="ref6">6</xref>]. Systematic reviews of automation bias in clinical decision support have documented its frequency across specialties and identified key mediating factors, including interface design and task complexity [<xref ref-type="bibr" rid="ref20">20</xref>]. Paradoxically, the more successful the dual-process architecture becomes at reducing errors, the more it may erode the baseline clinical reasoning skills required to function safely during system failures.</p><p>The structural response is not to slow AI integration, but to design the verification layer as pedagogically visible. An SLM that surfaces its reasoning&#x2014;linking each diagnostic flag to a specific guideline citation&#x2014;functions simultaneously as a safety guardrail and a real-time teaching tool [<xref ref-type="bibr" rid="ref21">21</xref>]. Critically, this requires a design commitment beyond mere filtering: the SLM should be presented to clinicians as a transparent reasoning partner, not an opaque gatekeeper, so that every interaction reinforces rather than replaces clinical judgment. Longitudinal exposure to SLM-sourced guideline citations can thereby function as embedded continuing medical education, building the domain fluency that the 2040 futures literacy pivot requires. Clinical reasoning and AI collaboration are not zero-sum; they are mutually reinforcing when the verification architecture is made visible.</p></sec><sec id="s8-2"><title>Algorithmic Equity and Siloed Bias</title><p>The shift from centralized cloud LLMs to locally deployed SLMs introduces a specific equity risk. If an institutional SLM is trained primarily on a single health system&#x2019;s demographic, it may produce well-calibrated outputs for that population and systematically miscalibrated outputs for underrepresented groups [<xref ref-type="bibr" rid="ref22">22</xref>]. The CAIO role must therefore explicitly manage the tension between local optimization and national equity benchmarks&#x2014;a trade-off that will intensify as local models diverge in their calibration profiles across demographically distinct health systems.</p><p>We propose 4 concrete structural safeguards. First, an equity audit mandate for the CAIO requires annual recalibration of local SLMs against nationally representative benchmarks, with mandatory public reporting of disaggregated performance metrics stratified by age, race, sex, and socioeconomic status. Enforcement authority would vest in the institutional CAIO, accountable to the same accreditation bodies that currently oversee quality improvement programs. Second, a <italic>cross-institutional calibration exchange</italic>&#x2014;analogous to federated model benchmarking frameworks piloted in oncology imaging AI [<xref ref-type="bibr" rid="ref8">8</xref>] and enabled technically by drift-aware federated SLM training [<xref ref-type="bibr" rid="ref16">16</xref>]&#x2014;would allow health systems to share calibration data without compromising data sovereignty or patient privacy; participation would require standardized data use agreements modeled on existing federated research network governance (eg, PCORnet). Third, a <italic>public transparency layer</italic> requiring institutions to publish SLM performance statistics across demographic strata would create accountability pressure independent of regulatory enforcement. Fourth, <italic>minimum training data diversity requirements</italic> for locally deployed SLMs&#x2014;aligned with emerging FDA guidance on AI- or machine learning&#x2013;based software as a medical device [<xref ref-type="bibr" rid="ref23">23</xref>]&#x2014;would establish upstream protections against demographically narrow calibration before deployment. We acknowledge, however, that these safeguards create a structural tension: local optimization for institutional demographics and national equity benchmarks may not be simultaneously achievable, and overly prescriptive diversity mandates risk regulatory fragmentation across health systems. Resolving this tension is a governance research priority for the 2035 pivot.</p></sec><sec id="s8-3"><title>Data Sovereignty and Medical Agency</title><p>As agentic AI orchestrates longitudinal care decisions autonomously, the current legal framework becomes structurally incoherent: it assigns individual physician liability for all diagnostic and treatment decisions, a structure incompatible with distributed AI-human agency. The road map&#x2019;s 2035 pivot requires a parallel legal innovation: a framework for <italic>collaborative medical agency</italic> that assigns shared accountability across the human clinician, the institution&#x2019;s CAIO, and the AI system&#x2019;s certified governance record.</p><p>Aviation&#x2019;s joint liability structures for automated cockpit systems offer a suggestive&#x2014;though imperfect&#x2014;precedent. The analogy has important limitations: aviation operates under a single unified global regulatory body (International Civil Aviation Organization), whereas medicine is governed by fragmented national and regional authorities (FDA, European Medicines Agency, and others) with incompatible approval pathways. Moreover, the aviation cockpit pairs 2 human pilots with a bounded automated system; the clinical &#x201C;cockpit&#x201D; may involve 1 clinician simultaneously managing multiple AI agents across care domains.</p><p>Despite these differences, several concrete mechanisms from aviation governance translate meaningfully to clinical AI. First, a <italic>decision record log</italic>&#x2014;analogous to a flight data recorder&#x2014;could capture the AI system&#x2019;s inputs, confidence scores, and flagged uncertainties for every consequential clinical recommendation, creating an auditable governance trail for adverse event review. Second, new <italic>institutional malpractice instruments</italic> could name the health system&#x2019;s CAIO as a coinsured party for AI-assisted decisions, distributing liability in proportion to the documented quality of the institutional governance record rather than assigning it entirely to the attending clinician. Third, a <italic>graduated autonomy protocol</italic>&#x2014;mirroring aviation&#x2019;s phased certification of autopilot authority at different flight stages&#x2014;could specify the conditions under which agentic AI is permitted to act without real-time human confirmation, with those conditions contractually tied to demonstrated calibration performance. Sketching these mechanisms now is speculative; their legal operationalization is, by definition, a task for the 2035 pivot.</p></sec><sec id="s8-4"><title>The Parallel Path of Patient Trust</title><p>This road map is explicitly clinician centric: its vision state is defined by clinician confidence in AI-assisted outputs, and its pivot points target the technical, institutional, and educational conditions for clinician adoption. However, the ultimate goal of that adoption is improved patient outcomes, and patient acceptance of AI-mediated care is a potentially rate-limiting variable that the road map cannot ignore. A clinician who trusts an AI-generated diagnosis may still encounter a patient who rejects it on the grounds that it originated from a &#x201C;robot&#x201D;&#x2014;a concern with documented precedent in digital health adoption literature [<xref ref-type="bibr" rid="ref1">1</xref>].</p><p>The 3 pivot points each carry patient trust implications. At the 2030 pivot, the semantic transparency requirement&#x2014;that every AI-generated claim carries a computable confidence score linked to verifiable evidence&#x2014;could be extended to produce patient-facing explanations: plain-language summaries of why the AI reached a given conclusion, designed to accommodate variability in health literacy. At the 2035 pivot, the CAIO&#x2019;s governance mandate should include a patient education function: communicating institutional AI policies, audit outcomes, and safeguard mechanisms to patients and communities in accessible formats. By the 2040 vision state, futures literacy in medical education should encompass shared decision-making frameworks for AI-assisted diagnosis, training clinicians to interpret AI outputs collaboratively with patients rather than presenting them as authoritative verdicts. A full account of patient trust is outside the scope of this Viewpoint but represents a necessary parallel research agenda.</p></sec><sec id="s8-5"><title>Operational Implementation Barriers</title><p>Several operational implementation barriers require explicit acknowledgment beyond the governance and regulatory dimensions already discussed. First, interoperability with legacy electronic health record systems presents a significant technical challenge: locally deployed SLMs must integrate with existing Health Level 7 or Fast Healthcare Interoperability Resources stacks and clinical data repositories through vendor-neutral application programming interfaces, a requirement that presupposes standards that do not yet uniformly exist. Second, workflow burden is nontrivial; a verification layer that introduces perceptible latency at the point of care risks clinician work-arounds that negate its safety function. Third, institutional digital maturity is highly variable: rural and safety-net health systems may lack the compute infrastructure required to run local SLMs, making the CAIO-as-a-service model described above a structural prerequisite rather than an optional alternative for a substantial portion of the health care system. Fourth, medico-legal fragmentation across jurisdictions means that governance frameworks developed for the US regulatory context will require substantial adaptation in the European Union, the United Kingdom, and other settings with different medical device and liability regimes. Fifth, procurement constraints in smaller health systems&#x2014;where capital budget cycles operate on 5-year timelines&#x2014;may not align with the decade-scale pivot timelines proposed here; phased procurement schedules and regional consortium purchasing agreements may be necessary to close this gap. Addressing these barriers is a practical prerequisite for the road map&#x2019;s transition from normative design to operational deployment.</p></sec><sec id="s8-6"><title>Limitations</title><p>This Viewpoint has 5 principal limitations. First, backcasting is a normative, not predictive, method; the pivot points identified here are necessary conditions for the 2040 vision, not guaranteed outcomes. Second, the 2040 vision state and its risk-stratified trust thresholds (&#x2265;90% for autonomous tasks; 70%&#x2010;85% for assistive support) reflect expert judgment rather than empirical consensus [<xref ref-type="bibr" rid="ref13">13</xref>]; future work should establish this vision through a formal multistakeholder Delphi process. Third, this analysis was conducted from a single professional vantage point; patient scholars, frontline nurses, and perspectives from low-resource settings are underrepresented. Fourth, the dual-process architecture requires empirical validation against real clinical datasets to establish whether SLM verification demonstrably improves the calibration of LLM diagnostic outputs. We regard this limitation as a tractable research priority and plan to address it in subsequent work using deidentified clinical data. Fifth, there is a geographic scope limitation: the governance mechanisms, liability frameworks, and accreditation references in this road map are drawn primarily from the US regulatory context (FDA, Accreditation Council for Graduate Medical Education, and state-level malpractice law). Readers in the European Union, the United Kingdom, or other jurisdictions will find the technical and educational pivot points broadly applicable but should adapt the political and legal dimensions to their national regulatory frameworks. A multijurisdictional extension of this road map is a planned next step.</p></sec></sec><sec id="s9" sec-type="conclusions"><title>Conclusions</title><p>The trust gap between AI capability and clinical adoption is neither technical nor inevitable. It is an institutional design problem, and it requires institutional design solutions. Backcasting transforms the question from &#x201C;when will AI be ready for medicine?&#x201D; to &#x201C;what must we build to make medicine ready for AI?&#x201D; The 3 pivot points identified here&#x2014;verifiable AI by 2030, agentic governance by 2035, and futures literacy by 2040&#x2014;are not predictions; they are structural commitments that clinicians, health system leaders, and policymakers can begin building today.</p><p>Medical futures studies offer the community of practice needed to sustain this work. We invite clinicians, researchers, and AI developers to engage formally with backcasting, scenario planning, and other foresight methods&#x2014;not as academic exercises, but as essential planning instruments for building institutional capacity to shape health care&#x2019;s technological future. We additionally propose a multi-institutional consortium to prospectively evaluate the dual-process architecture using deidentified electronic health record data across diverse health systems, with the goal of establishing the empirical pivot evidence base that this normative road map cannot itself provide. The road map offered here is intended as a structured starting point for that institutional work.</p></sec></body><back><ack><p>The author would like to thank the guest editors of this theme issue, Dr. Bertalan Mesk&#x00F3; and Dr. Tam&#x00E1;s Krist&#x00F3;f, whose 2024 scoping review [<xref ref-type="bibr" rid="ref2">2</xref>] directly motivated this work.</p></ack><notes><sec><title>Funding</title><p>This research received no external funding.</p></sec></notes><fn-group><fn fn-type="con"><p>YY conceived the study, conducted the backcasting analysis, and wrote the manuscript in its entirety.</p></fn><fn fn-type="conflict"><p>YY is employed by Zyter|TruCare, a health technology company. The proposed dual-process artificial intelligence (AI) architecture and chief AI officer role are conceptual contributions to the scholarly literature and do not describe, endorse, or promote any product or service offered by YY's employer.</p></fn></fn-group><glossary><title>Abbreviations</title><def-list><def-item><term id="abb1">AI</term><def><p>artificial intelligence</p></def></def-item><def-item><term id="abb2">CAIO</term><def><p>chief artificial intelligence officer</p></def></def-item><def-item><term id="abb3">CIO</term><def><p>chief information officer</p></def></def-item><def-item><term id="abb4">CMO</term><def><p>chief medical officer</p></def></def-item><def-item><term id="abb5">FDA</term><def><p>Food and Drug Administration</p></def></def-item><def-item><term id="abb6">HAT</term><def><p>human-AI teaming</p></def></def-item><def-item><term id="abb7">LLM</term><def><p>large language model</p></def></def-item><def-item><term id="abb8">SLM</term><def><p>small language model</p></def></def-item><def-item><term id="abb9">STEEP</term><def><p>social, technological, economic, environmental, and political</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Topol</surname><given-names>EJ</given-names> </name></person-group><article-title>High-performance medicine: the convergence of human and artificial intelligence</article-title><source>Nat Med</source><year>2019</year><month>01</month><volume>25</volume><issue>1</issue><fpage>44</fpage><lpage>56</lpage><pub-id pub-id-type="doi">10.1038/s41591-018-0300-7</pub-id><pub-id pub-id-type="medline">30617339</pub-id></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Mesk&#x00F3;</surname><given-names>B</given-names> </name><name name-style="western"><surname>Krist&#x00F3;f</surname><given-names>T</given-names> </name><name name-style="western"><surname>Dhunnoo</surname><given-names>P</given-names> </name><name name-style="western"><surname>&#x00C1;rvai</surname><given-names>N</given-names> </name><name name-style="western"><surname>Katonai</surname><given-names>G</given-names> </name></person-group><article-title>Exploring the need for medical futures studies: insights from a scoping review of health care foresight</article-title><source>J Med Internet Res</source><year>2024</year><month>10</month><day>9</day><volume>26</volume><fpage>e57148</fpage><pub-id pub-id-type="doi">10.2196/57148</pub-id><pub-id pub-id-type="medline">39383528</pub-id></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Popper</surname><given-names>R</given-names> </name></person-group><article-title>How are foresight methods selected?</article-title><source>Foresight</source><year>2008</year><month>10</month><volume>10</volume><issue>6</issue><fpage>62</fpage><lpage>89</lpage><pub-id pub-id-type="doi">10.1108/14636680810918586</pub-id></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Robinson</surname><given-names>JB</given-names> </name></person-group><article-title>Unlearning and backcasting: rethinking some of the questions we ask about the future</article-title><source>Technol Forecast Soc Change</source><year>1988</year><month>07</month><volume>33</volume><issue>4</issue><fpage>325</fpage><lpage>338</lpage><pub-id pub-id-type="doi">10.1016/0040-1625(88)90029-7</pub-id></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Dreborg</surname><given-names>KH</given-names> </name></person-group><article-title>Essence of backcasting</article-title><source>Futures</source><year>1996</year><month>11</month><volume>28</volume><issue>9</issue><fpage>813</fpage><lpage>828</lpage><pub-id pub-id-type="doi">10.1016/S0016-3287(96)00044-4</pub-id></nlm-citation></ref><ref id="ref6"><label>6</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Char</surname><given-names>DS</given-names> </name><name name-style="western"><surname>Shah</surname><given-names>NH</given-names> </name><name name-style="western"><surname>Magnus</surname><given-names>D</given-names> </name></person-group><article-title>Implementing machine learning in health care - addressing ethical challenges</article-title><source>N Engl J Med</source><year>2018</year><month>03</month><day>15</day><volume>378</volume><issue>11</issue><fpage>981</fpage><lpage>983</lpage><pub-id pub-id-type="doi">10.1056/NEJMp1714229</pub-id><pub-id pub-id-type="medline">29539284</pub-id></nlm-citation></ref><ref id="ref7"><label>7</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Singhal</surname><given-names>K</given-names> </name><name name-style="western"><surname>Azizi</surname><given-names>S</given-names> </name><name name-style="western"><surname>Tu</surname><given-names>T</given-names> </name><etal/></person-group><article-title>Large language models encode clinical knowledge</article-title><source>Nature</source><year>2023</year><month>08</month><volume>620</volume><issue>7972</issue><fpage>172</fpage><lpage>180</lpage><pub-id pub-id-type="doi">10.1038/s41586-023-06291-2</pub-id><pub-id pub-id-type="medline">37438534</pub-id></nlm-citation></ref><ref id="ref8"><label>8</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Rajpurkar</surname><given-names>P</given-names> </name><name name-style="western"><surname>Chen</surname><given-names>E</given-names> </name><name name-style="western"><surname>Banerjee</surname><given-names>O</given-names> </name><name name-style="western"><surname>Topol</surname><given-names>EJ</given-names> </name></person-group><article-title>AI in health and medicine</article-title><source>Nat Med</source><year>2022</year><month>01</month><volume>28</volume><issue>1</issue><fpage>31</fpage><lpage>38</lpage><pub-id pub-id-type="doi">10.1038/s41591-021-01614-0</pub-id><pub-id pub-id-type="medline">35058619</pub-id></nlm-citation></ref><ref id="ref9"><label>9</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Jian</surname><given-names>JY</given-names> </name><name name-style="western"><surname>Bisantz</surname><given-names>AM</given-names> </name><name name-style="western"><surname>Drury</surname><given-names>CG</given-names> </name></person-group><article-title>Foundations for an empirically determined scale of trust in automated systems</article-title><source>Int J Cogn Ergon</source><year>2000</year><month>03</month><volume>4</volume><issue>1</issue><fpage>53</fpage><lpage>71</lpage><pub-id pub-id-type="doi">10.1207/S15327566IJCE0401_04</pub-id></nlm-citation></ref><ref id="ref10"><label>10</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Amann</surname><given-names>J</given-names> </name><name name-style="western"><surname>Blasimme</surname><given-names>A</given-names> </name><name name-style="western"><surname>Vayena</surname><given-names>E</given-names> </name><name name-style="western"><surname>Frey</surname><given-names>D</given-names> </name><name name-style="western"><surname>Madai</surname><given-names>VI</given-names> </name><collab>Precise4Q consortium</collab></person-group><article-title>Explainability for artificial intelligence in healthcare: a multidisciplinary perspective</article-title><source>BMC Med Inform Decis Mak</source><year>2020</year><month>11</month><day>30</day><volume>20</volume><issue>1</issue><fpage>310</fpage><pub-id pub-id-type="doi">10.1186/s12911-020-01332-6</pub-id><pub-id pub-id-type="medline">33256715</pub-id></nlm-citation></ref><ref id="ref11"><label>11</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Kahneman</surname><given-names>D</given-names> </name></person-group><source>Thinking, Fast and Slow</source><year>2011</year><publisher-name>Farrar, Straus and Giroux</publisher-name><pub-id pub-id-type="other">9781429969352</pub-id></nlm-citation></ref><ref id="ref12"><label>12</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Yu</surname><given-names>Y</given-names> </name></person-group><article-title>Hybrid-code: a privacy-preserving, redundant multi-agent framework for reliable local clinical coding</article-title><source>arXiv</source><access-date>2026-04-20</access-date><comment>Preprint posted online on  Dec 26, 2025</comment><comment><ext-link ext-link-type="uri" xlink:href="https://arxiv.org/html/2512.23743v1">https://arxiv.org/html/2512.23743v1</ext-link></comment></nlm-citation></ref><ref id="ref13"><label>13</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Yu</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Gomez-Cabello</surname><given-names>CA</given-names> </name><name name-style="western"><surname>Haider</surname><given-names>SA</given-names> </name><etal/></person-group><article-title>Enhancing clinician trust in AI diagnostics: a dynamic framework for confidence calibration and transparency</article-title><source>Diagnostics (Basel)</source><year>2025</year><month>08</month><day>30</day><volume>15</volume><issue>17</issue><fpage>2204</fpage><pub-id pub-id-type="doi">10.3390/diagnostics15172204</pub-id><pub-id pub-id-type="medline">40941691</pub-id></nlm-citation></ref><ref id="ref14"><label>14</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Guo</surname><given-names>C</given-names> </name><name name-style="western"><surname>Pleiss</surname><given-names>G</given-names> </name><name name-style="western"><surname>Sun</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Weinberger</surname><given-names>KQ</given-names> </name></person-group><article-title>On calibration of modern neural networks</article-title><access-date>2026-04-28</access-date><conf-name>Proceedings of the 34th International Conference on Machine Learning</conf-name><conf-date>Aug 6-11, 2017</conf-date><comment><ext-link ext-link-type="uri" xlink:href="https://proceedings.mlr.press/v70/guo17a/guo17a.pdf">https://proceedings.mlr.press/v70/guo17a/guo17a.pdf</ext-link></comment></nlm-citation></ref><ref id="ref15"><label>15</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Moor</surname><given-names>M</given-names> </name><name name-style="western"><surname>Banerjee</surname><given-names>O</given-names> </name><name name-style="western"><surname>Abad</surname><given-names>ZSH</given-names> </name><etal/></person-group><article-title>Foundation models for generalist medical artificial intelligence</article-title><source>Nature New Biol</source><year>2023</year><month>04</month><volume>616</volume><issue>7956</issue><fpage>259</fpage><lpage>265</lpage><pub-id pub-id-type="doi">10.1038/s41586-023-05881-4</pub-id><pub-id pub-id-type="medline">37045921</pub-id></nlm-citation></ref><ref id="ref16"><label>16</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Yu</surname><given-names>Y</given-names> </name></person-group><article-title>AdaptiveFedLoRA: drift-aware adaptive LoRA rank scheduling for federated medical small language models</article-title><source>medRxiv</source><comment>Preprint posted online on  Jan 21, 2026</comment><pub-id pub-id-type="doi">10.64898/2026.01.18.26344237</pub-id></nlm-citation></ref><ref id="ref17"><label>17</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Price</surname><given-names>WN</given-names>  <suffix>II</suffix></name><name name-style="western"><surname>Gerke</surname><given-names>S</given-names> </name><name name-style="western"><surname>Cohen</surname><given-names>IG</given-names> </name></person-group><article-title>Potential liability for physicians using artificial intelligence</article-title><source>JAMA</source><year>2019</year><month>11</month><day>12</day><volume>322</volume><issue>18</issue><fpage>1765</fpage><lpage>1766</lpage><pub-id pub-id-type="doi">10.1001/jama.2019.15064</pub-id><pub-id pub-id-type="medline">31584609</pub-id></nlm-citation></ref><ref id="ref18"><label>18</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wartman</surname><given-names>SA</given-names> </name><name name-style="western"><surname>Combs</surname><given-names>CD</given-names> </name></person-group><article-title>Medical education must move from the information age to the age of artificial intelligence</article-title><source>Acad Med</source><year>2018</year><month>08</month><volume>93</volume><issue>8</issue><fpage>1107</fpage><lpage>1109</lpage><pub-id pub-id-type="doi">10.1097/ACM.0000000000002044</pub-id><pub-id pub-id-type="medline">29095704</pub-id></nlm-citation></ref><ref id="ref19"><label>19</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Miller</surname><given-names>R</given-names> </name></person-group><source>Transforming the Future: Anticipation in the 21st Century</source><year>2018</year><publisher-name>Routledge</publisher-name><pub-id pub-id-type="other">9781351048002</pub-id></nlm-citation></ref><ref id="ref20"><label>20</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Goddard</surname><given-names>K</given-names> </name><name name-style="western"><surname>Roudsari</surname><given-names>A</given-names> </name><name name-style="western"><surname>Wyatt</surname><given-names>JC</given-names> </name></person-group><article-title>Automation bias: a systematic review of frequency, effect mediators, and mitigators</article-title><source>J Am Med Inform Assoc</source><year>2012</year><volume>19</volume><issue>1</issue><fpage>121</fpage><lpage>127</lpage><pub-id pub-id-type="doi">10.1136/amiajnl-2011-000089</pub-id><pub-id pub-id-type="medline">21685142</pub-id></nlm-citation></ref><ref id="ref21"><label>21</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Miller</surname><given-names>T</given-names> </name></person-group><article-title>Explanation in artificial intelligence: insights from the social sciences</article-title><source>Artif Intell</source><year>2019</year><month>02</month><volume>267</volume><fpage>1</fpage><lpage>38</lpage><pub-id pub-id-type="doi">10.1016/j.artint.2018.07.007</pub-id></nlm-citation></ref><ref id="ref22"><label>22</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Obermeyer</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Powers</surname><given-names>B</given-names> </name><name name-style="western"><surname>Vogeli</surname><given-names>C</given-names> </name><name name-style="western"><surname>Mullainathan</surname><given-names>S</given-names> </name></person-group><article-title>Dissecting racial bias in an algorithm used to manage the health of populations</article-title><source>Science</source><year>2019</year><month>10</month><day>25</day><volume>366</volume><issue>6464</issue><fpage>447</fpage><lpage>453</lpage><pub-id pub-id-type="doi">10.1126/science.aax2342</pub-id><pub-id pub-id-type="medline">31649194</pub-id></nlm-citation></ref><ref id="ref23"><label>23</label><nlm-citation citation-type="web"><article-title>Artificial intelligence/machine learning (AI/ML)-based software as a medical device (SaMD) action plan</article-title><source>U.S. Food and Drug Administration</source><year>2021</year><access-date>2026-04-20</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.fda.gov/media/145022/download">https://www.fda.gov/media/145022/download</ext-link></comment></nlm-citation></ref></ref-list></back></article>