<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="letter"><front><journal-meta><journal-id journal-id-type="nlm-ta">J Med Internet Res</journal-id><journal-id journal-id-type="publisher-id">jmir</journal-id><journal-id journal-id-type="index">1</journal-id><journal-title>Journal of Medical Internet Research</journal-title><abbrev-journal-title>J Med Internet Res</abbrev-journal-title><issn pub-type="epub">1438-8871</issn><publisher><publisher-name>JMIR Publications</publisher-name><publisher-loc>Toronto, Canada</publisher-loc></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">v27i1e73086</article-id><article-id pub-id-type="doi">10.2196/73086</article-id><article-categories><subj-group subj-group-type="heading"><subject>Research Letter</subject></subj-group></article-categories><title-group><article-title>Racial Misclassification of American Indian and Alaska Native People in the Electronic Medical Record: An Unexpected Hurdle in a Retrospective Medical Record Cohort Study</article-title></title-group><contrib-group><contrib contrib-type="author" corresp="yes"><name name-style="western"><surname>Rusk</surname><given-names>Ann Marie</given-names></name><degrees>MD</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Chamberlain</surname><given-names>Alanna M</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Felzer</surname><given-names>Jamie</given-names></name><degrees>MPH, MD</degrees><xref ref-type="aff" rid="aff3">3</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Bui</surname><given-names>Yvonne</given-names></name><degrees>MD</degrees><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Patten</surname><given-names>Christi A</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Destephano</surname><given-names>Christopher C</given-names></name><degrees>MPH, MD</degrees><xref ref-type="aff" rid="aff4">4</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Rank</surname><given-names>Matthew A</given-names></name><degrees>MD</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Benzo</surname><given-names>Roberto P</given-names></name><degrees>MD</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Kennedy</surname><given-names>Cassie C</given-names></name><degrees>MS, MD</degrees><xref ref-type="aff" rid="aff2">2</xref></contrib></contrib-group><aff id="aff1"><institution>Mayo Clinic in Arizona</institution><addr-line>13400 E. Shea Blvd</addr-line><addr-line>Scottsdale</addr-line><addr-line>AZ</addr-line><country>United States</country></aff><aff id="aff2"><institution>Mayo Clinic</institution><addr-line>Rochester</addr-line><addr-line>MN</addr-line><country>United States</country></aff><aff id="aff3"><institution>Emory University</institution><addr-line>Atlanta</addr-line><addr-line>GA</addr-line><country>United States</country></aff><aff id="aff4"><institution>Mayo Clinic in Florida</institution><addr-line>Jacksonville</addr-line><addr-line>FL</addr-line><country>United States</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Sarvestan</surname><given-names>Javad</given-names></name></contrib></contrib-group><contrib-group><contrib contrib-type="reviewer"><name name-style="western"><surname>Pacheco</surname><given-names>Joseph</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Nash</surname><given-names>Sarah H</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to  Ann Marie Rusk, MD, Mayo Clinic in Arizona, 13400 E. Shea Blvd, Scottsdale, AZ, 85259, United States; <email>rusk.ann@mayo.edu</email></corresp></author-notes><pub-date pub-type="collection"><year>2025</year></pub-date><pub-date pub-type="epub"><day>30</day><month>7</month><year>2025</year></pub-date><volume>27</volume><elocation-id>e73086</elocation-id><history><date date-type="received"><day>17</day><month>03</month><year>2025</year></date><date date-type="rev-recd"><day>10</day><month>06</month><year>2025</year></date><date date-type="accepted"><day>11</day><month>06</month><year>2025</year></date></history><copyright-statement>&#x00A9; Ann Marie Rusk, Alanna M Chamberlain, Jamie Felzer, Yvonne Bui, Christi A Patten, Christopher C Destephano, Matthew A Rank, Roberto P Benzo, Cassie C Kennedy. Originally published in the Journal of Medical Internet Research (<ext-link ext-link-type="uri" xlink:href="https://www.jmir.org">https://www.jmir.org</ext-link>), 30.7.2025. </copyright-statement><copyright-year>2025</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in the Journal of Medical Internet Research (ISSN 1438-8871), is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="https://www.jmir.org/">https://www.jmir.org/</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://www.jmir.org/2025/1/e73086"/><abstract><p>Electronic health record data represent a rich data source; however, data accuracy must be considered prior to reporting health outcomes among American Indian and Alaska Native people. Using a hybrid approach to harmonizing data from multiple sources represents a valid method of assessing data integrity in this population.</p></abstract><kwd-group><kwd>electronic health records</kwd><kwd>EHR</kwd><kwd>health care disparities</kwd><kwd>Indigenous health</kwd><kwd>American Indian or Alaska Native</kwd><kwd>social determinants of health</kwd></kwd-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><p>Indigenous North Americans (American Indian and Alaska Native [AI/AN] people) in the United States have the shortest life expectancy among all racial or ethnic groups [<xref ref-type="bibr" rid="ref1">1</xref>]. Disparate health and survival outcomes are influenced by social determinants of health (SDOHs)&#x2014;factors that influence birth, health, life, and death&#x2014;including health behaviors and systemic factors (eg, health care access) [<xref ref-type="bibr" rid="ref2">2</xref>]. Addressing health care disparities requires equitable representation in public health data. In a retrospective cohort study that examined longitudinal cigarette smoking behaviors of Indigenous people in Olmsted County, Minnesota&#x2014;a county without access to Indian Health Service clinics or hospitals&#x2014;the magnitude of racial misclassification in electronic health record (EHR) data became an unexpected hurdle for the study team [<xref ref-type="bibr" rid="ref3">3</xref>]. Most AI/AN people reside in urban areas or off reservation lands [<xref ref-type="bibr" rid="ref4">4</xref>]. Understanding this population&#x2019;s health behaviors is critical to informing interventions. Herein, we describe methods for harmonizing race data from multiple record sources to assure this frequently underrepresented and mischaracterized population&#x2019;s accurate representation.</p></sec><sec id="s2" sec-type="methods"><title>Methods</title><p>Individuals with vital records (birth or death certificate) or EHR data (provider histories, EHR flowsheets, self-reports, or nursing documentation) indicating AI/AN race were identified in a longitudinal cohort study (2006&#x2010;2019) to assess smoking behaviors and pharmaceutical cessation aid uptake by race, sex, age, and indexed SDOHs [<xref ref-type="bibr" rid="ref3">3</xref>]. Inclusion criteria were AI/AN race and availability of &#x2265;1 year of smoking data. Exclusion criteria included non-AI/AN race and no smoking data available from 2006 to 2019. Patients were identified in the Rochester Epidemiology Project&#x2014;a medical-record linkage system (established since 1966) inclusive of multiple health care delivery systems and population data for 99.9% of Olmsted County residents [<xref ref-type="bibr" rid="ref5">5</xref>,<xref ref-type="bibr" rid="ref6">6</xref>]. AMR, JF, and YB conducted data cleaning for all available records to resolve discordant records of AI/AN race; this included manual review of narrative EHR data, exclusion of individuals who used foreign language translation services, and review of patients&#x2019; vital records and records of parents and offspring. A sex- and age-matched (&#x00B1;5 years) non-AI/AN cohort was compared to the study cohort.</p></sec><sec id="s3" sec-type="results"><title>Results</title><p>In total, 1271 individuals with &#x2265;1 record indicating AI/AN race were identified; 148 were excluded (missing smoking data: n=124; no 2006-2019 EHR data: n=24). Manual review of the AI/AN cohort&#x2019;s race and ethnicity data revealed 25 individuals who reported immigration from a non&#x2013;North American country, and 200 individuals required foreign language interpreters for languages originating outside of North America (primarily languages originating from the Indian subcontinent and Southeast Asia). Final data cleaning resulted in a cohort of 898 AI/AN patients, demonstrating 17.7% (225/1271) racial misclassification [<xref ref-type="bibr" rid="ref3">3</xref>] (<xref ref-type="fig" rid="figure1">Figure 1</xref>). The annual smoking prevalence for race-misclassified individuals (n=225) ranged between 8% and 23%; that for the AI/AN cohort (n=898) ranged between 39% and 47% (<xref ref-type="fig" rid="figure2">Figure 2</xref>). The matched cohort included 1780 individuals (White: n=1483, 83.3%; Black/African American: n=105, 5.9%; Asian: n=96, 5.4%; Hawaiian/Pacific Islander: n=4, 0.2%; other: n=68, 3.8%; declined: n=8, 0.4%; unknown: n=16, 0.9%) [<xref ref-type="bibr" rid="ref3">3</xref>].</p><fig position="float" id="figure1"><label>Figure 1.</label><caption><p>Inclusion and exclusion criteria applied to identify a cohort of American Indian or Alaska Native individuals in the Rochester Epidemiology Project from 2006 to 2019.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="jmir_v27i1e73086_fig01.png"/></fig><fig position="float" id="figure2"><label>Figure 2.</label><caption><p>Annual smoking prevalence.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="jmir_v27i1e73086_fig02.png"/></fig></sec><sec id="s4" sec-type="discussion"><title>Discussion</title><p>Harmonization of vital records and multiple EHR sources proved essential, as the magnitude of race misclassification (17.7%) in this study was higher than that in other AI/AN population studies, including a review of mortality data among AI/AN people in Washington State (12%) [<xref ref-type="bibr" rid="ref7">7</xref>]. Without cohort validation, this study&#x2019;s smoking prevalence would have been falsely lower due to lower smoking prevalence among race-misclassified individuals. Smoking behavior misrepresentation in medical literature would further exacerbate health care disparities in this underrepresented population. The Centers for Medicare &#x0026; Medicaid Services have recognized the need to standardize data entry, releasing resources for health care organizations to improve demographic accuracy [<xref ref-type="bibr" rid="ref8">8</xref>]. Until standardized data entry is implemented, additional methods for validating historical race data are necessary [<xref ref-type="bibr" rid="ref9">9</xref>]. Data linkage&#x2014;the harmonization of an individual&#x2019;s data across different sources&#x2014;represents a valid methodology [<xref ref-type="bibr" rid="ref10">10</xref>]. Using a hybrid approach to AI/AN cohort validation&#x2014;manual review of narrative documentation, vital records, and EHR input across multiple health systems&#x2014;represents a potential method for smaller epidemiological studies. This study&#x2019;s limitations included the inability to link data with tribal registries or Indian Health Service data and the time required to manually review records. Our methods may be used for counties where AI/AN individuals lack access to tribal health facilities (eg, Olmsted County). Besides manual data review, studies including AI/AN people should be conducted in concert with AI/AN people and tribes. This study was designed and conducted with oversight by an AI/AN community advisory board that expressed the critical importance of accurate race data. Studies using EHR data inclusive of AI/AN people should include measures for ensuring accurate race data and representation.</p></sec></body><back><ack><p>The content of this article is the sole responsibility of the authors and does not represent the views of the National Institutes of Health, Mayo Clinic, or the Robert D. and Patricia E. Kern Center for the Science of Health Care Delivery. The authors thank the Healthy Nations Advisory Board for their input and oversight of this study. This study was supported by funding from the Robert D. and Patricia E. Kern Center for the Science of Health Care Delivery, the Rochester Epidemiology Project (REP) Scholarship, the Robert A. Winn Career Development Award, and the American Thoracic Society Fellowship in Health Equity and Diversity. The REP medical-record linkage system is supported by the National Institute on Aging (NIA, AG 058738), the Mayo Clinic Research Committee, and fees paid annually by REP users. There was no use of generative artificial intelligence for any version of drafts, outlines, writing, text, or figures in this paper.</p></ack><fn-group><fn fn-type="con"><p>Conceptualization: AMR, AMC, CAP, CCK</p><p>Data curation: AMR, AMC</p><p>Formal analysis: AMR, JF, YB</p><p>Funding acquisition: AMR</p><p>Investigation: AMR, CCK</p><p>Methodology: AMR, CCK</p><p>Project administration: AMR</p><p>Resources: AMR, CCK</p><p>Software: CCK</p><p>Supervision: AMC, CAP, MAR, RPB, CCK</p><p>Validation: AMR, CCK</p><p>Visualization: AMR</p><p>Writing &#x2013; original draft: AMR</p><p>Writing &#x2013; review &#x0026; editing: AMR, AMC, CAP, CCD, MAR, RPB, CCK</p></fn><fn fn-type="conflict"><p>None declared.</p></fn></fn-group><glossary><title>Abbreviations</title><def-list><def-item><term id="abb1">AI/AN</term><def><p>American Indian and Alaska Native</p></def></def-item><def-item><term id="abb2">EHR</term><def><p>electronic health record</p></def></def-item><def-item><term id="abb3">SDOH</term><def><p>social determinant of health</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Arias</surname><given-names>E</given-names> </name><name name-style="western"><surname>Xu</surname><given-names>J</given-names> </name><name name-style="western"><surname>Kochanek</surname><given-names>K</given-names> </name></person-group><article-title>United States life tables, 2021</article-title><source>Natl Vital Stat Rep</source><year>2023</year><month>11</month><volume>72</volume><issue>12</issue><fpage>1</fpage><lpage>64</lpage><pub-id pub-id-type="medline">38048433</pub-id></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Alvidrez</surname><given-names>J</given-names> </name><name name-style="western"><surname>Castille</surname><given-names>D</given-names> </name><name name-style="western"><surname>Laude-Sharp</surname><given-names>M</given-names> </name><name name-style="western"><surname>Rosario</surname><given-names>A</given-names> </name><name name-style="western"><surname>Tabor</surname><given-names>D</given-names> </name></person-group><article-title>The National Institute on Minority Health and Health Disparities research framework</article-title><source>Am J Public Health</source><year>2019</year><month>01</month><volume>109</volume><issue>S1</issue><fpage>S16</fpage><lpage>S20</lpage><pub-id pub-id-type="doi">10.2105/AJPH.2018.304883</pub-id><pub-id pub-id-type="medline">30699025</pub-id></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Rusk</surname><given-names>AM</given-names> </name><name name-style="western"><surname>Giblon</surname><given-names>RE</given-names> </name><name name-style="western"><surname>Chamberlain</surname><given-names>AM</given-names> </name><etal/></person-group><article-title>Indigenous smoking behaviors in Olmsted County, Minnesota: a longitudinal population-based study</article-title><source>Mayo Clin Proc</source><year>2022</year><month>10</month><volume>97</volume><issue>10</issue><fpage>1836</fpage><lpage>1848</lpage><pub-id pub-id-type="doi">10.1016/j.mayocp.2022.03.019</pub-id><pub-id pub-id-type="medline">36202495</pub-id></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="web"><article-title>Public Use Microdata Sample</article-title><source>United States Census Bureau</source><year>2019</year><access-date>2025-06-06</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.census.gov/programs-surveys/acs/microdata/access.html">https://www.census.gov/programs-surveys/acs/microdata/access.html</ext-link></comment></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>St Sauver</surname><given-names>JL</given-names> </name><name name-style="western"><surname>Grossardt</surname><given-names>BR</given-names> </name><name name-style="western"><surname>Yawn</surname><given-names>BP</given-names> </name><name name-style="western"><surname>Melton</surname><given-names>LJ</given-names>  <suffix>3rd</suffix></name><name name-style="western"><surname>Rocca</surname><given-names>WA</given-names> </name></person-group><article-title>Use of a medical records linkage system to enumerate a dynamic population over time: the Rochester epidemiology project</article-title><source>Am J Epidemiol</source><year>2011</year><month>05</month><day>1</day><volume>173</volume><issue>9</issue><fpage>1059</fpage><lpage>1068</lpage><pub-id pub-id-type="doi">10.1093/aje/kwq482</pub-id><pub-id pub-id-type="medline">21430193</pub-id></nlm-citation></ref><ref id="ref6"><label>6</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>St Sauver</surname><given-names>JL</given-names> </name><name name-style="western"><surname>Grossardt</surname><given-names>BR</given-names> </name><name name-style="western"><surname>Leibson</surname><given-names>CL</given-names> </name><name name-style="western"><surname>Yawn</surname><given-names>BP</given-names> </name><name name-style="western"><surname>Melton</surname><given-names>LJ 3rd</given-names> </name><name name-style="western"><surname>Rocca</surname><given-names>WA</given-names> </name></person-group><article-title>Generalizability of epidemiological findings and public health decisions: an illustration from the Rochester Epidemiology Project</article-title><source>Mayo Clin Proc</source><year>2012</year><month>02</month><volume>87</volume><issue>2</issue><fpage>151</fpage><lpage>160</lpage><pub-id pub-id-type="doi">10.1016/j.mayocp.2011.11.009</pub-id><pub-id pub-id-type="medline">22305027</pub-id></nlm-citation></ref><ref id="ref7"><label>7</label><nlm-citation citation-type="web"><person-group person-group-type="author"><name name-style="western"><surname>Dankovchik</surname><given-names>J</given-names> </name><name name-style="western"><surname>Hoopes</surname><given-names>M</given-names> </name><name name-style="western"><surname>Nordstrom</surname><given-names>DL</given-names> </name><name name-style="western"><surname>Knaster</surname><given-names>E</given-names> </name></person-group><article-title>Racial misclassification and disparities in mortality among AI/AN and other races, Washington</article-title><source>Agency for Healthcare Research and Quality</source><year>2012</year><access-date>2024-12-30</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://hcup-us.ahrq.gov/datainnovations/raceethnicitytoolkit/or26.jsp">https://hcup-us.ahrq.gov/datainnovations/raceethnicitytoolkit/or26.jsp</ext-link></comment></nlm-citation></ref><ref id="ref8"><label>8</label><nlm-citation citation-type="web"><article-title>Inventory of resources for standardized demographic and language collection</article-title><source>Centers for Medicare &#x0026; Medicaid Services</source><year>2024</year><month>03</month><access-date>2024-05-06</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.cms.gov/about-cms/agency-information/omh/downloads/data-collection-resources.pdf">https://www.cms.gov/about-cms/agency-information/omh/downloads/data-collection-resources.pdf</ext-link></comment></nlm-citation></ref><ref id="ref9"><label>9</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Martinez</surname><given-names>RAM</given-names> </name><name name-style="western"><surname>Andrabi</surname><given-names>N</given-names> </name><name name-style="western"><surname>Goodwin</surname><given-names>AN</given-names> </name><name name-style="western"><surname>Wilbur</surname><given-names>RE</given-names> </name><name name-style="western"><surname>Smith</surname><given-names>NR</given-names> </name><name name-style="western"><surname>Zivich</surname><given-names>PN</given-names> </name></person-group><article-title>Conceptualization, operationalization, and utilization of race and ethnicity in major epidemiology journals, 1995-2018: a systematic review</article-title><source>Am J Epidemiol</source><year>2023</year><month>02</month><day>24</day><volume>192</volume><issue>3</issue><fpage>483</fpage><lpage>496</lpage><pub-id pub-id-type="doi">10.1093/aje/kwac146</pub-id><pub-id pub-id-type="medline">35938872</pub-id></nlm-citation></ref><ref id="ref10"><label>10</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Chin</surname><given-names>MK</given-names> </name><name name-style="western"><surname>&#x0110;o&#x00E0;n</surname><given-names>LN</given-names> </name><name name-style="western"><surname>Russo</surname><given-names>RG</given-names> </name><etal/></person-group><article-title>Methods for retrospectively improving race/ethnicity data quality: a scoping review</article-title><source>Epidemiol Rev</source><year>2023</year><month>12</month><day>20</day><volume>45</volume><issue>1</issue><fpage>127</fpage><lpage>139</lpage><pub-id pub-id-type="doi">10.1093/epirev/mxad002</pub-id><pub-id pub-id-type="medline">37045807</pub-id></nlm-citation></ref></ref-list></back></article>