<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="article-commentary"><front><journal-meta><journal-id journal-id-type="nlm-ta">J Med Internet Res</journal-id><journal-id journal-id-type="publisher-id">jmir</journal-id><journal-id journal-id-type="index">1</journal-id><journal-title>Journal of Medical Internet Research</journal-title><abbrev-journal-title>J Med Internet Res</abbrev-journal-title><issn pub-type="epub">1438-8871</issn><publisher><publisher-name>JMIR Publications</publisher-name><publisher-loc>Toronto, Canada</publisher-loc></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">v28i1e97859</article-id><article-id pub-id-type="doi">10.2196/97859</article-id><article-categories><subj-group subj-group-type="heading"><subject>Commentary</subject></subj-group></article-categories><title-group><article-title>From Data Stewardship to Model Stewardship: Extending Governance Frameworks for AI Era Health Data Use</article-title></title-group><contrib-group><contrib contrib-type="author" corresp="yes"><name name-style="western"><surname>Rozenblit</surname><given-names>Leon</given-names></name><degrees>JD, PhD</degrees><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Labkoff</surname><given-names>Steven</given-names></name><degrees>MD</degrees><xref ref-type="aff" rid="aff2">2</xref><xref ref-type="aff" rid="aff3">3</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Safran</surname><given-names>Charles</given-names></name><degrees>MS, MD</degrees><xref ref-type="aff" rid="aff2">2</xref><xref ref-type="aff" rid="aff4">4</xref></contrib></contrib-group><aff id="aff1"><institution>Q.E.D. Institute</institution><addr-line>New Haven</addr-line><addr-line>CT</addr-line><country>United States</country></aff><aff id="aff2"><institution>Division of Clinical Informatics, Beth Israel Deaconess Medical Center</institution><addr-line>133 Brookline Avenue, HVMA Annex, Suite 2200</addr-line><addr-line>Boston</addr-line><addr-line>MA</addr-line><country>United States</country></aff><aff id="aff3"><institution>Luminant Consulting</institution><addr-line>Stamford</addr-line><addr-line>CT</addr-line><country>United States</country></aff><aff id="aff4"><institution>Department of Medicine, Harvard Medical School</institution><addr-line>Boston</addr-line><addr-line>MA</addr-line><country>United States</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Leung</surname><given-names>Tiffany</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to Leon Rozenblit, JD, PhD, Division of Clinical Informatics, Beth Israel Deaconess Medical Center, 133 Brookline Avenue, HVMA Annex, Suite 2200, Boston, MA, 02215, United States, 1 617-278-8162; <email>lrozenbl@bidmc.harvard.edu</email></corresp></author-notes><pub-date pub-type="collection"><year>2026</year></pub-date><pub-date pub-type="epub"><day>5</day><month>6</month><year>2026</year></pub-date><volume>28</volume><elocation-id>e97859</elocation-id><history><date date-type="received"><day>10</day><month>04</month><year>2026</year></date><date date-type="rev-recd"><day>01</day><month>05</month><year>2026</year></date><date date-type="accepted"><day>03</day><month>05</month><year>2026</year></date></history><copyright-statement>&#x00A9; Leon Rozenblit, Steven Labkoff, Charles Safran. Originally published in the Journal of Medical Internet Research (<ext-link ext-link-type="uri" xlink:href="https://www.jmir.org">https://www.jmir.org</ext-link>), 5.6.2026. </copyright-statement><copyright-year>2026</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in the Journal of Medical Internet Research (ISSN 1438-8871), is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="https://www.jmir.org/">https://www.jmir.org/</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://www.jmir.org/2026/1/e97859"/><abstract><p>Maris et al document important ethical challenges at the intersection of electronic health record data and artificial intelligence development, but existing governance frameworks designed for secondary data use are categorically insufficient for artificial intelligence model training, which creates persistent deployable artifacts that encode local clinical patterns as generalizable knowledge. Drawing on two decades of stewardship framework development, we propose extending governance from data stewardship to model stewardship.</p></abstract><kwd-group><kwd>data stewardship</kwd><kwd>model stewardship</kwd><kwd>AI governance</kwd><kwd>electronic health records</kwd><kwd>decontextualization</kwd><kwd>secondary use</kwd><kwd>clinical tropism</kwd><kwd>health data ethics</kwd></kwd-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><p>Maris et al [<xref ref-type="bibr" rid="ref1">1</xref>] make an empirical contribution to the ethics of health data use for artificial intelligence (AI), grounding four cross-cutting themes (privacy, public trust, fair representation, and responsible integration) in stakeholder perspectives from the LEAPfROG project. Their identification of &#x201C;decontextualization&#x201D; as a central challenge deserves particular attention. We write from the vantage of two decades of work on stewardship frameworks for health data. The American Medical Informatics Association (AMIA) national framework for secondary use [<xref ref-type="bibr" rid="ref2">2</xref>], the National Committee on Vital and Health Statistics (NCVHS) stewardship report to the Department of Health and Human Services [<xref ref-type="bibr" rid="ref3">3</xref>], and the elaboration of data stewardship principles [<xref ref-type="bibr" rid="ref4">4</xref>] established core principles (accountability, chain of trust, transparency, data quality) that Maris et al&#x2019;s [<xref ref-type="bibr" rid="ref1">1</xref>] stakeholders independently rediscover. This convergence is validating but concerning: the principles hold, yet remain unoperationalized. We argue that AI model training represents a fundamentally new form of data use requiring a shift from data stewardship to model stewardship.</p></sec><sec id="s2"><title>AI Model Training Is Not Your Grandfather&#x2019;s Secondary Use</title><p>Nearly 20 years ago, a national expert panel defined the secondary use of health data as uses beyond direct patient care, including research, quality measurement, public health surveillance, and commercial applications [<xref ref-type="bibr" rid="ref2">2</xref>]. AI model training falls under this broad umbrella, but it differs from every use the framework&#x2019;s architects envisioned. Traditional secondary uses analyze data and produce bounded findings; AI training creates persistent, deployable artifacts: models that may be commercialized globally, influence clinical decisions at scale, and embed the assumptions of their training context into every future prediction. A research study produces conclusions bounded by its methods and sample; a model trained on the same data produces an artifact with unbounded downstream reach and no expiration date.</p><p>The NCVHS [<xref ref-type="bibr" rid="ref3">3</xref>] recommended abandoning &#x201C;secondary use&#x201D; as too imprecise for meaningful governance; advice even more apt today. AI training should be recognized as a qualitatively distinct category of secondary use, with stewardship requirements reflecting its unique characteristics: persistence, scalability, commercial deployment, and the encoding of institutional context as generalizable knowledge.</p></sec><sec id="s3"><title>The Challenge of Decontextualization</title><p>Maris et al [<xref ref-type="bibr" rid="ref1">1</xref>] identify decontextualization as a cross-cutting ethical concern. We argue it is more fundamental than their analysis suggests: not one challenge among several, but the mechanism through which the others arise.</p><p>Electronic health record data encode not only clinical facts but institutional workflows, documentation practices, coding conventions, billing incentives, and resource constraints. Van der Lei&#x2019;s [<xref ref-type="bibr" rid="ref5">5</xref>] first law of medical informatics, that data should be used only for the purpose for which they were collected, takes on new force when the reuse creates persistent, deployable artifacts rather than bounded research findings.</p><p>Two distinct challenges are at work. The first is data quality, and it is improvable: advances in ambient documentation, terminology that captures clinical intent, and better problem list governance will strengthen electronic health record data over time. The second is structural and persists regardless of data quality: every dataset carries the institutional fingerprint of its origin. What Maris et al [<xref ref-type="bibr" rid="ref1">1</xref>], following Alami et al [<xref ref-type="bibr" rid="ref6">6</xref>], term &#x201C;clinical tropism,&#x201D; the tendency of AI to reproduce narrow training environment practices, is a symptom of this structural layer. A model trained at an academic medical center with aggressive sepsis protocols learns different signals than one at a community hospital, not because the data are poor, but because they faithfully reflect different contexts (<xref ref-type="fig" rid="figure1">Figure 1</xref>).</p><p>Deploying models trained in one context across settings that differ systematically risks disadvantaging patients in predictable, preventable ways, the kind of harm that stewardship frameworks were designed to address.</p><fig position="float" id="figure1"><label>Figure 1.</label><caption><p>Progressive decontextualization of clinical data. Panel A: a recognizable scene representing the rich context of clinical reality. Panel B: the same scene reduced to a grayscale grid of discrete tiles. Panel C: the tiles resorted by value, severing all spatial relationships&#x2014;a visual metaphor for how electronic health record data lose institutional context when extracted for model training. AI: artificial intelligence.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="jmir_v28i1e97859_fig01.png"/></fig></sec><sec id="s4"><title>From Data Stewardship to Model Stewardship</title><p>The AMIA and NCVHS frameworks established stewardship principles for health data: accountability, chain of trust, transparency, oversight, data quality, and individual participation [<xref ref-type="bibr" rid="ref3">3</xref>,<xref ref-type="bibr" rid="ref4">4</xref>]. These principles must now extend to AI models and the datasets used to train them.</p><p>Consider the chain of trust, a core NCVHS concept. When data flow from hospital to aggregator to AI company to commercial model to clinical deployment across institutions, with the training data never represented, the chain does not merely stretch; it breaks. Who is the steward of a model trained on data from five health systems and deployed in 50?</p><p>Recent work offers concrete starting points that are achievable with existing infrastructure. Multistakeholder governance frameworks [<xref ref-type="bibr" rid="ref7">7</xref>,<xref ref-type="bibr" rid="ref8">8</xref>] propose domain-specific approaches: clinical decision support, real-world evidence generation, and consumer health AI each require distinct governance structures. The Safe, Effective, Equitable, Trustworthy (SEET) framework provides organizing principles [<xref ref-type="bibr" rid="ref8">8</xref>], while recommendations for AI-enabled clinical decision support specify validation, certification, safety monitoring, adverse event reporting, and provenance documentation requirements [<xref ref-type="bibr" rid="ref9">9</xref>,<xref ref-type="bibr" rid="ref10">10</xref>]. Real-world data governance standards, including metadata requirements and bias documentation [<xref ref-type="bibr" rid="ref9">9</xref>], offer complementary infrastructure.</p><p>Model stewardship, we propose, should encompass at minimum training data provenance documentation, so downstream users know what populations and practice settings a model reflects; (2) cross-institutional validation before deployment beyond the training context; (3) ongoing monitoring for context drift as clinical practices evolve; and (4) accountability structures that follow the model through its life cycle, not merely the data at its origin. All these requirements are technically feasible: provenance documentation and validation protocols exist in other regulated domains. Do we have the will to mandate them?</p></sec><sec id="s5" sec-type="conclusions"><title>Conclusion</title><p>Maris et al [<xref ref-type="bibr" rid="ref1">1</xref>] are right that stakeholder-led governance is essential. But governance must evolve to match its target. AI models are not simply a new use of data; they are new artifacts with their own life cycle, risks, and accountability requirements. The stewardship frameworks built over two decades provide a proven foundation; extending them is the challenge. The immediate task is clear: require training data provenance and cross-institutional validation as preconditions for clinical AI deployment, just as we require evidence of efficacy before deploying therapeutics.</p></sec></body><back><ack><p>Generative artificial intelligence tools (Anthropic Claude, Opus 4.6 model) were used to assist with literature organization, outline structuring, prose drafting, and generating illustrative figures during the preparation of this commentary. All intellectual content, arguments, and conclusions are the authors&#x2019; own. The authors reviewed, edited, and take full responsibility for the final manuscript.</p></ack><notes><sec><title>Funding</title><p>The authors declared that no financial support was received for this work.</p></sec><sec><title>Data Availability</title><p>This commentary reports no original research data.</p></sec></notes><fn-group><fn fn-type="con"><p>LR conceived the commentary, developed the argument structure, and wrote the first draft. SL contributed to the historical stewardship framework analysis and codeveloped the thesis. CS reviewed the outline and draft, contributed to the data quality improvement perspective, and provided critical revisions. All authors reviewed and approved the final manuscript.</p></fn><fn fn-type="conflict"><p>None declared.</p></fn></fn-group><glossary><title>Abbreviations</title><def-list><def-item><term id="abb1">AI</term><def><p>artificial intelligence</p></def></def-item><def-item><term id="abb2">AMIA</term><def><p>American Medical Informatics Association</p></def></def-item><def-item><term id="abb3">NCVHS</term><def><p>National Committee on Vital and Health Statistics</p></def></def-item><def-item><term id="abb4">SEET</term><def><p>Safe, Effective, Equitable, Trustworthy</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Maris</surname><given-names>MT</given-names> </name><name name-style="western"><surname>Klopotowska</surname><given-names>JE</given-names> </name><name name-style="western"><surname>Cornet</surname><given-names>R</given-names> </name><etal/></person-group><article-title>The ethics of leveraging routinely collected patient data for AI development: mixed methods study</article-title><source>J Med Internet Res</source><year>2026</year><month>03</month><day>2</day><volume>28</volume><fpage>e79863</fpage><pub-id pub-id-type="doi">10.2196/79863</pub-id><pub-id pub-id-type="medline">41814967</pub-id></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Safran</surname><given-names>C</given-names> </name><name name-style="western"><surname>Bloomrosen</surname><given-names>M</given-names> </name><name name-style="western"><surname>Hammond</surname><given-names>WE</given-names> </name><etal/></person-group><article-title>Toward a national framework for the secondary use of health data: an American Medical Informatics Association White Paper</article-title><source>J Am Med Inform Assoc</source><year>2007</year><volume>14</volume><issue>1</issue><fpage>1</fpage><lpage>9</lpage><pub-id pub-id-type="doi">10.1197/jamia.M2273</pub-id><pub-id pub-id-type="medline">17077452</pub-id></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="report"><person-group person-group-type="author"><collab>National Committee on Vital and Health Statistics</collab></person-group><article-title>Enhanced protections for uses of health data: a stewardship framework for &#x201C;secondary uses&#x201D; of electronically collected and transmitted health data</article-title><year>2007</year><access-date>2026-04-09</access-date><publisher-name>Department of Health &#x0026; Human Services</publisher-name><comment><ext-link ext-link-type="uri" xlink:href="https://ncvhs.hhs.gov/wp-content/uploads/2013/12/071221lt.pdf">https://ncvhs.hhs.gov/wp-content/uploads/2013/12/071221lt.pdf</ext-link></comment></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Bloomrosen</surname><given-names>M</given-names> </name><name name-style="western"><surname>Detmer</surname><given-names>DE</given-names> </name></person-group><article-title>Advancing the framework: use of health data--a report of a working conference of the American Medical Informatics Association</article-title><source>J Am Med Inform Assoc</source><year>2008</year><volume>15</volume><issue>6</issue><fpage>715</fpage><lpage>722</lpage><pub-id pub-id-type="doi">10.1197/jamia.M2905</pub-id><pub-id pub-id-type="medline">18755988</pub-id></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>van der Lei</surname><given-names>J</given-names> </name></person-group><article-title>Use and abuse of computer-stored medical records</article-title><source>Methods Inf Med</source><year>1991</year><month>04</month><volume>30</volume><issue>2</issue><fpage>79</fpage><lpage>80</lpage><pub-id pub-id-type="doi">10.1055/s-0038-1634831</pub-id><pub-id pub-id-type="medline">1857252</pub-id></nlm-citation></ref><ref id="ref6"><label>6</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Alami</surname><given-names>H</given-names> </name><name name-style="western"><surname>Lehoux</surname><given-names>P</given-names> </name><name name-style="western"><surname>Auclair</surname><given-names>Y</given-names> </name><etal/></person-group><article-title>Artificial intelligence and health technology assessment: anticipating a new level of complexity</article-title><source>J Med Internet Res</source><year>2020</year><month>07</month><day>7</day><volume>22</volume><issue>7</issue><fpage>e17707</fpage><pub-id pub-id-type="doi">10.2196/17707</pub-id><pub-id pub-id-type="medline">32406850</pub-id></nlm-citation></ref><ref id="ref7"><label>7</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Rozenblit</surname><given-names>L</given-names> </name><name name-style="western"><surname>Price</surname><given-names>A</given-names> </name><name name-style="western"><surname>Solomonides</surname><given-names>A</given-names> </name><etal/></person-group><article-title>Towards a multi-stakeholder process for developing responsible AI governance in consumer health</article-title><source>Int J Med Inform</source><year>2025</year><month>03</month><volume>195</volume><fpage>105713</fpage><pub-id pub-id-type="doi">10.1016/j.ijmedinf.2024.105713</pub-id><pub-id pub-id-type="medline">39642592</pub-id></nlm-citation></ref><ref id="ref8"><label>8</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Rozenblit</surname><given-names>L</given-names> </name><name name-style="western"><surname>Price</surname><given-names>A</given-names> </name><name name-style="western"><surname>Solomonides</surname><given-names>A</given-names> </name><etal/></person-group><article-title>Toward responsible AI governance: balancing multi-stakeholder perspectives on AI in healthcare</article-title><source>Int J Med Inform</source><year>2025</year><month>11</month><volume>203</volume><fpage>106015</fpage><pub-id pub-id-type="doi">10.1016/j.ijmedinf.2025.106015</pub-id><pub-id pub-id-type="medline">40680319</pub-id></nlm-citation></ref><ref id="ref9"><label>9</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Koski</surname><given-names>E</given-names> </name><name name-style="western"><surname>Das</surname><given-names>A</given-names> </name><name name-style="western"><surname>Hsueh</surname><given-names>PYS</given-names> </name><etal/></person-group><article-title>Towards responsible artificial intelligence in healthcare-getting real about real-world data and evidence</article-title><source>J Am Med Inform Assoc</source><year>2025</year><month>11</month><day>1</day><volume>32</volume><issue>11</issue><fpage>1746</fpage><lpage>1755</lpage><pub-id pub-id-type="doi">10.1093/jamia/ocaf133</pub-id><pub-id pub-id-type="medline">40999782</pub-id></nlm-citation></ref><ref id="ref10"><label>10</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Labkoff</surname><given-names>S</given-names> </name><name name-style="western"><surname>Oladimeji</surname><given-names>B</given-names> </name><name name-style="western"><surname>Kannry</surname><given-names>J</given-names> </name><etal/></person-group><article-title>Toward a responsible future: recommendations for AI-enabled clinical decision support</article-title><source>J Am Med Inform Assoc</source><year>2024</year><month>11</month><day>1</day><volume>31</volume><issue>11</issue><fpage>2730</fpage><lpage>2739</lpage><pub-id pub-id-type="doi">10.1093/jamia/ocae209</pub-id><pub-id pub-id-type="medline">39325508</pub-id></nlm-citation></ref></ref-list></back></article>