<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="research-article"><front><journal-meta><journal-id journal-id-type="nlm-ta">J Med Internet Res</journal-id><journal-id journal-id-type="publisher-id">jmir</journal-id><journal-id journal-id-type="index">1</journal-id><journal-title>Journal of Medical Internet Research</journal-title><abbrev-journal-title>J Med Internet Res</abbrev-journal-title><issn pub-type="epub">1438-8871</issn><publisher><publisher-name>JMIR Publications</publisher-name><publisher-loc>Toronto, Canada</publisher-loc></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">v28i1e90482</article-id><article-id pub-id-type="doi">10.2196/90482</article-id><article-categories><subj-group subj-group-type="heading"><subject>Original Paper</subject></subj-group></article-categories><title-group><article-title>Measuring the Quality of Datasets: Development of the IDEFIM Indicator Set for Empirical Health Research</article-title></title-group><contrib-group><contrib contrib-type="author"><name name-style="western"><surname>Harkener</surname><given-names>Sonja</given-names></name><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Bott</surname><given-names>Oliver J</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Draeger</surname><given-names>Christian</given-names></name><xref ref-type="aff" rid="aff3">3</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Hartz</surname><given-names>Tobias</given-names></name><xref ref-type="aff" rid="aff4">4</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Jenetzky</surname><given-names>Ekkehart</given-names></name><degrees>MD, PhD</degrees><xref ref-type="aff" rid="aff5">5</xref><xref ref-type="aff" rid="aff6">6</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>L&#x00F6;be</surname><given-names>Matthias</given-names></name><xref ref-type="aff" rid="aff3">3</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>March</surname><given-names>Stefanie</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff7">7</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Schubert</surname><given-names>Chris</given-names></name><xref ref-type="aff" rid="aff8">8</xref></contrib><contrib contrib-type="author" corresp="yes"><name name-style="western"><surname>Stausberg</surname><given-names>J&#x00FC;rgen</given-names></name><degrees>MD, PhD</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib></contrib-group><aff id="aff1"><institution>Institute for Medical Informatics, Biometry and Epidemiology, Faculty of Medicine, University of Duisburg-Essen</institution><addr-line>Hufelandstr. 55</addr-line><addr-line>Essen</addr-line><addr-line>North Rhine-Westphalia</addr-line><country>Germany</country></aff><aff id="aff2"><institution>Institute for Applied Data Science Hannover (DATA|H), Hochschule Hannover - University of Applied Sciences and Arts</institution><addr-line>Hannover</addr-line><addr-line>Lower Saxony</addr-line><country>Germany</country></aff><aff id="aff3"><institution>Institute for Medical Informatics, Statistics and Epidemiology, University of Leipzig</institution><addr-line>Leipzig</addr-line><addr-line>Saxony</addr-line><country>Germany</country></aff><aff id="aff4"><institution>Clinical Cancer Registry Lower Saxony</institution><addr-line>Hannover</addr-line><country>Germany</country></aff><aff id="aff5"><institution>Faculty of Health/School of Medicine, Witten/Herdecke University</institution><addr-line>Witten</addr-line><addr-line>North Rhine-Westphalia</addr-line><country>Germany</country></aff><aff id="aff6"><institution>University Medical Center of the Johannes Gutenberg University Mainz</institution><addr-line>Mainz</addr-line><addr-line>Rhineland-Palatinate</addr-line><country>Germany</country></aff><aff id="aff7"><institution>Magdeburg-Stendal University of Applied Sciences</institution><addr-line>Magdeburg</addr-line><addr-line>Saxony-Anhalt</addr-line><country>Germany</country></aff><aff id="aff8"><institution>Library, TU Wien</institution><addr-line>Vienna</addr-line><country>Austria</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Coristine</surname><given-names>Andrew</given-names></name></contrib></contrib-group><contrib-group><contrib contrib-type="reviewer"><name name-style="western"><surname>Sagi</surname><given-names>Tomer</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Xu</surname><given-names>Wei</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to J&#x00FC;rgen Stausberg, MD, PhD, Institute for Medical Informatics, Biometry and Epidemiology, Faculty of Medicine, University of Duisburg-Essen, Hufelandstr. 55, Essen, North Rhine-Westphalia, 45122, Germany, 49 201 72377201; <email>stausberg@ekmed.de</email></corresp></author-notes><pub-date pub-type="collection"><year>2026</year></pub-date><pub-date pub-type="epub"><day>17</day><month>6</month><year>2026</year></pub-date><volume>28</volume><elocation-id>e90482</elocation-id><history><date date-type="received"><day>29</day><month>12</month><year>2025</year></date><date date-type="rev-recd"><day>13</day><month>05</month><year>2026</year></date><date date-type="accepted"><day>15</day><month>05</month><year>2026</year></date></history><copyright-statement>&#x00A9; Sonja Harkener, Oliver J Bott, Christian Draeger, Tobias Hartz, Ekkehart Jenetzky, Matthias L&#x00F6;be, Stefanie March, Chris Schubert, J&#x00FC;rgen Stausberg. Originally published in the Journal of Medical Internet Research (<ext-link ext-link-type="uri" xlink:href="https://www.jmir.org">https://www.jmir.org</ext-link>), 17.6.2026. </copyright-statement><copyright-year>2026</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in the Journal of Medical Internet Research (ISSN 1438-8871), is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="https://www.jmir.org/">https://www.jmir.org/</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://www.jmir.org/2026/1/e90482"/><abstract><sec><title>Background</title><p>To be beneficial for empirical health research, a dataset must be fit for use. The quality of a dataset can only be influenced during data collection, yet it is evaluated multiple times during analysis or secondary use by applying quality indicators.</p></sec><sec><title>Objective</title><p>This study aimed to establish an up-to-date set of indicators measuring the quality of datasets in empirical health research.</p></sec><sec sec-type="methods"><title>Methods</title><p>A total of 3 pillars were combined. First, the 51 indicators of a German guideline from 2014 about the management of data quality were revised. Second, a literature review was performed looking for evidence sources since 2013 that describe, propose, or apply dataset quality indicators. Third, indicators were supplemented by a manual search and other sources. The quality indicators were then integrated into the IDEFIM framework. The IDEFIM framework distinguishes between the categories&#x2019; data, metadata, context, and openness quality. In this work, only the categories data and metadata quality, with their 14 dimensions were considered.</p></sec><sec sec-type="results"><title>Results</title><p>In total, 69 indicators qualified for the IDEFIM indicator set, 53 related to the category data quality, and 16 to the category metadata quality. A total of 30 indicators originated from the German guideline, 31 from the literature review. Three indicators were added to cover aspects of diversity, equity, and inclusion, and an additional 5 related to specifics of data and metadata quality not addressed so far. Most indicators were found in the dimensions accuracy (data) with 12 measures, completeness (data) with 12 measures, and consistency (data) with 19 measures. According to the number of supporting evidence sources, missing values in data elements (48 evidence sources), contradictions (31), and currentness (26) were the most popular quality indicators. Metadata quality was significantly less frequently addressed.</p></sec><sec sec-type="conclusions"><title>Conclusions</title><p>The presented IDEFIM indicator set can be used for the management of data collections as well as for the verification of a dataset&#x2019;s quality for an intended use. The indicator set should also be considered in the design of a study in empirical health research and the development of software tools supporting the visualization of issues related to the quality of a dataset.</p></sec></abstract><kwd-group><kwd>dataset</kwd><kwd>data quality</kwd><kwd>empirical health research</kwd><kwd>health care</kwd><kwd>metadata</kwd><kwd>quality indicator</kwd><kwd>secondary use</kwd><kwd>IDEFIM framework</kwd></kwd-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><p>The goal of having data that is fit for use accompanies the entire life cycle of empirical health research (<xref ref-type="fig" rid="figure1">Figure 1</xref>). It begins with the development of a data collection following a systematic approach as a study [<xref ref-type="bibr" rid="ref1">1</xref>] or registry protocol [<xref ref-type="bibr" rid="ref2">2</xref>]. Ideally, predefined research questions guide the determination of populations, samples, visits, and variables, to name a few aspects [<xref ref-type="bibr" rid="ref3">3</xref>]. It is worthwhile to consider study-specific requirements for characteristics of a dataset, such as case completeness, data completeness, and correctness, already in the development phase, because the design - as well as the available resources - will substantially contribute to fulfilling these requirements. Many recommendations have been made for appropriate designs focusing on different types of empirical health research, for example, CONSORT (Consolidated Standards of Reporting Trials) for randomized trials [<xref ref-type="bibr" rid="ref4">4</xref>] or STROBE (Strengthening the Reporting of Observational Studies in Epidemiology) for observational research [<xref ref-type="bibr" rid="ref5">5</xref>].</p><fig position="float" id="figure1"><label>Figure 1.</label><caption><p>Life cycle of datasets in empirical health research from the development phase to their linkage and secondary use.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="jmir_v28i1e90482_fig01.png"/></fig><p>Project-specific requirements regarding the quality of a dataset can be measured, evaluated, and benchmarked against individual or common thresholds. During data collection, the plan-do-check-act cycle (PDCA cycle) described by Deming [<xref ref-type="bibr" rid="ref6">6</xref>] can be applied to detect weak points and to implement actions for improvement. The data quality standards of the International Organization for Standardization (ISO) are based upon the approach of active quality management that was brought into practice in the 1950s by United States-Americans such as Crosby [<xref ref-type="bibr" rid="ref7">7</xref>] and Juran [<xref ref-type="bibr" rid="ref8">8</xref>]. ISO 8000 defines data quality as the degree to which a set of inherent characteristics of data fulfills requirements [<xref ref-type="bibr" rid="ref9">9</xref>]. The degree is something measurable that could be quantified using a &#x201C;data quality measure&#x201D; introduced in ISO/International Electrotechnical Commission (IEC) 25024 [<xref ref-type="bibr" rid="ref10">10</xref>]. However, even within ISO standards, there are different definitions of the term &#x201C;data quality,&#x201D; each of them reasonable in the respective subject areas. Returning to the PDCA cycle, the measures about the quality of a dataset support the check part within the PDCA cycle, and the consequences determine the act part of the cycle regarding the management and operation of a data collection.</p><p>In empirical health research, data collection is usually followed by an analysis phase [<xref ref-type="bibr" rid="ref11">11</xref>]. Once data collection is complete (with the exception of interim analyses), the design of the data collection could not be changed. The same is true for secondary analyses of data, for example, from electronic health records [<xref ref-type="bibr" rid="ref12">12</xref>]. Measuring quality is important in the analysis phase to assess whether a dataset is fit for the intended use or for a secondary use. However, reviewing a dataset related to its design, its documentation, its availability, and similar aspects may also be useful. On the one hand, reviews supplement the measurement of characteristics with additional information, such as the FAIRness of a dataset [<xref ref-type="bibr" rid="ref13">13</xref>]. On the other hand, reviews may be less demanding because access to a dataset is not necessary to evaluate its fitness for use. This advantage of reviews compared to complex measurements could motivate the data quality and utility label proposed for the secondary use of data in the European Health Data Space (EHDS) [<xref ref-type="bibr" rid="ref14">14</xref>]. However, simply documenting information on data quality does not indicate the certain degree of quality needed, for example, to assess the appropriateness of a dataset with regard to different purposes of empirical health research, such as effectiveness evaluation as the most prominent purpose or hypothesis generation as a basic purpose [<xref ref-type="bibr" rid="ref15">15</xref>].</p><p>New challenges concerning data quality arise from using datasets to generate large language models [<xref ref-type="bibr" rid="ref16">16</xref>], from the availability of large and uncontrolled volumes of data (&#x201C;big data&#x201D;) [<xref ref-type="bibr" rid="ref17">17</xref>], and from linking datasets either as part of an active data collection or for analysis work [<xref ref-type="bibr" rid="ref18">18</xref>]. The associated risk of discrimination against vulnerable groups [<xref ref-type="bibr" rid="ref19">19</xref>] demands a strong consideration of data quality issues.</p><p>IDEFIM aims to support the collection and analysis phases of empirical health research by offering an up-to-date list of measures that assess a dataset&#x2019;s quality. A dataset was defined as a logically meaningful grouping of data [<xref ref-type="bibr" rid="ref20">20</xref>] and as an &#x201C;identifiable collection of data available for access or download in one or more formats&#x201D; [<xref ref-type="bibr" rid="ref21">21</xref>]. The terminology in the field of data quality was diverse and inconsistent. Therefore, IDEFIM kept the term &#x201C;quality indicator&#x201D; from previous work [<xref ref-type="bibr" rid="ref22">22</xref>] as a synonym for &#x201C;quality measure&#x201D; [<xref ref-type="bibr" rid="ref23">23</xref>]. Quality indicators measure the degree to which requirements concerning characteristics of datasets are met. IDEFIM assumed that the degree to which a dataset possesses a particular characteristic can be quantified by several quality indicators, each focusing on a different aspect of that characteristic. The goal of IDEFIM is to establish a comprehensive and consistent set of quality indicators for empirical health research, embedded in a common framework addressing a dataset&#x2019;s fitness for purpose and fitness for use. In previous publications, we presented preparatory and supporting work, including our motivation [<xref ref-type="bibr" rid="ref24">24</xref>], our framework covering dimensions and categories [<xref ref-type="bibr" rid="ref25">25</xref>], and a structure for specifying a dataset quality indicator [<xref ref-type="bibr" rid="ref26">26</xref>]. Here, we introduce the IDEFIM indicator set for empirical health research.</p></sec><sec id="s2" sec-type="methods"><title>Methods</title><sec id="s2-1"><title>IDEFIM Framework</title><p>The IDEFIM framework consists of 4 categories with 21 dimensions (<xref ref-type="fig" rid="figure2">Figure 2</xref> [<xref ref-type="bibr" rid="ref25">25</xref>]), 8 dimensions in category data quality, 6 in category metadata quality, 3 in category context quality, and 4 in category openness quality. In accordance with ISO 8000, quality indicators measure the degree to which general requirements for characteristics, that is, dimensions, are met. This degree can be used to evaluate the fitness of a dataset for different purposes of empirical health research, such as health services research, quality research, or drug approval research. To evaluate a dataset&#x2019;s fitness for use in a particular project, quality indicator instances are applied to adjust the quality indicators to the particular setting, such as the recorded data elements. Quality checks represent the algorithms applied to the data. IDEFIM focused on quality indicators and a respective indicator set. However, quality indicators seem not to be appropriate with regard to the categories of context and openness quality. Respective approaches, such as the FAIR Guiding Principles [<xref ref-type="bibr" rid="ref13">13</xref>] and the 5-star open data approach [<xref ref-type="bibr" rid="ref27">27</xref>], propose Boolean conditions regarding structures (eg, the existence of an open license as a 1-star prerequisite) and processes (eg, an authentication and authorization procedure to access data as one FAIR Guiding Principle). Boolean conditions are not suitable for calculating measures. Consequently, IDEFIM examined quality indicators only with regard to the categories data and metadata quality.</p><fig position="float" id="figure2"><label>Figure 2.</label><caption><p>IDEFIM framework with five layers supporting the goal to achieve datasets that are fit for purpose and fit for use.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="jmir_v28i1e90482_fig02.png"/></fig></sec><sec id="s2-2"><title>Material</title><sec id="s2-2-1"><title>Overview</title><p>The development of the IDEFIM indicator set was based on 3 pillars. First, it builds on national efforts to create a guideline for an adaptive management of data quality in cohort studies and registries [<xref ref-type="bibr" rid="ref28">28</xref>,<xref ref-type="bibr" rid="ref29">29</xref>]. Particularly, IDEFIM used indicators published in 2014 under the auspices of the Technology and Methods Platform for Networked Medical Research (TMF), with the second version of this guideline. These TMF indicators were developed through a systematic process over the years based on literature reviews, expert consultations, and community involvement [<xref ref-type="bibr" rid="ref22">22</xref>]. Second, a broad literature review was conducted in the field of data quality in empirical health research that included the search for evidence sources proposing, describing, or using indicators [<xref ref-type="bibr" rid="ref24">24</xref>]. This review expanded similar work from the TMF guidelines by searching for publications from 2013 onwards until January 2024. Third, the project identified additional indicators through empirical knowledge of the core project team, manual search looking at relevant journals and conference proceedings, snowball sampling starting with records retrieved during the literature search, and consultations of invited experts in 2 workshops. The first face-to-face workshop with 8 experts took place in November 2024, focusing on the framework. The second face-to-face workshop with 7 experts took place in August 2025, focused on a proposal from the project concerning quality indicators. The workshops were strictly advisory. The core research team made the final decision to include or reject an indicator. The combination of the 3 pillars resulted in a first draft of the set, which explicitly relates each indicator to its origin. This draft was condensed into the final set described here. Each indicator in the final set was described using a uniform structure [<xref ref-type="bibr" rid="ref26">26</xref>].</p></sec><sec id="s2-2-2"><title>TMF Indicators</title><p>The guideline contains 51 indicators organized into 3 categories: integrity, organization, and trueness. These indicators are defined in a structured way using 14 items such as sources, calculation, and interpretation. As IDEFIM was concerned with empirical research projects on a general level, 10 of these indicators were excluded because they were either cancer registry specific or concerned measurements and their conditions that particularly occur in cohort studies (<xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). Thus, 41 indicators remained for IDEFIM from the TMF guideline. Each indicator was then assigned to exactly 1 dimension. Names and definitions of the indicators were revised and extended as necessary to create a consistent and up-to-date set of quality indicators.</p></sec><sec id="s2-2-3"><title>Literature Search</title><p>The literature search included Medline, the Cochrane Library, the Web of Science, and Scopus [<xref ref-type="bibr" rid="ref24">24</xref>]. It was conducted on February 2, 2024, for Medline and on January 16, 2024, for the other 3 information sources. The search looked at English or German records published since January 1, 2013. The search criteria were tailored to the individual options offered by the 4 information sources (<xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>). The results were merged, and duplicates were eliminated, resulting in 2748 records.</p><p>Due to the large volume of records, the screening based on titles and abstracts was split into 2 stages. A prescreening focused on data quality as a topic, the relationship to empirical health research, and general relevance. The records were divided equally among 2 reviewers. The prescreening took place between February 9, 2024, and March 6, 2024. The remaining 734 records were evaluated independently by 2 reviewers (SH and JS) between March 12, 2024, and April 8, 2024. The relationship of a record to the topic of data quality could be further differentiated into the following aspects: the structure for the description of data quality indicators, indicators of data quality, dimensions of data, gender equality and diversity in relation to data quality, and other references to data quality. An optional comment could be entered for each search result. In case of disagreement between the reviewers, a consensus was reached, resulting in 221 records.</p><p>Out of the 221 search results, 219 were obtained in full text. Here as well, due to the unexpectedly high number of remaining records, the search results were randomly divided in half and evaluated using the following criteria: type of scientific project (eg, analysis of data quality), reference to empirical research projects in medicine (eg, registry), reference to data quality (eg, indicators for data quality), and recommendation for further consideration. This left 117 sources of evidence for the review. The PRISMA (Preferred Reporting Items for Systematic Reviews and Meta-Analyses) flow diagram is presented in <xref ref-type="supplementary-material" rid="app3">Multimedia Appendix 3</xref>. <xref ref-type="table" rid="table1">Table 1</xref> shows the distribution of the 117 sources with regard to aspects of data quality.</p><table-wrap id="t1" position="float"><label>Table 1.</label><caption><p>Distribution of the 117 evidence sources with regard to aspects of data quality (multiple options possible).</p></caption><table id="table1" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Category</td><td align="left" valign="bottom">Frequency</td></tr></thead><tbody><tr><td align="left" valign="top">Dimensions of data</td><td align="left" valign="top">101</td></tr><tr><td align="left" valign="top">Quality indicators</td><td align="left" valign="top">79</td></tr><tr><td align="left" valign="top">Standards of data quality</td><td align="left" valign="top">28</td></tr><tr><td align="left" valign="top">Special statistical procedures</td><td align="left" valign="top">21</td></tr><tr><td align="left" valign="top">Overall score for data quality</td><td align="left" valign="top">19</td></tr><tr><td align="left" valign="top">Interventions to improve data quality</td><td align="left" valign="top">18</td></tr><tr><td align="left" valign="top">Quality checks</td><td align="left" valign="top">15</td></tr><tr><td align="left" valign="top">Structure of quality indicators</td><td align="left" valign="top">9</td></tr><tr><td align="left" valign="top">Quality of metadata and paradata</td><td align="left" valign="top">6</td></tr><tr><td align="left" valign="top">Gender equality and diversity with reference to data quality</td><td align="left" valign="top">4</td></tr><tr><td align="left" valign="top">Other reference to data quality</td><td align="left" valign="top">28</td></tr></tbody></table></table-wrap></sec><sec id="s2-2-4"><title>Literature Review</title><p>The literature search yielded 117 relevant evidence sources, out of which 79 specifically related to data quality indicators. Each evidence source was examined by one reviewer (SH) for possible indicators, indicator instances, and quality checks. Due to the diverse and inconsistent terminology of measures, indicator instances, and quality checks were considered at this stage. Respective findings were recorded for each evidence source. However, some sources yielded no findings. In these cases, the entire source was recorded as a single finding. This resulted in a total of 622 findings, ie, candidates for quality indicators, from 79 evidence sources. A total of 2 reviewers (JS and SH) assessed each finding independently. If a finding corresponded to one of the 41 considered TMF indicators, it was marked accordingly. If no assignment made sense, either a &#x201C;new indicator,&#x201D; &#x201C;not applicable,&#x201D; or &#x201C;undecidable&#x201D; flag could be set. Proposals for new indicators were mainly considered in relation to the categories of data and metadata quality. It was assumed that requirements concerning context and openness quality are better defined using Boolean conditions than measures. <xref ref-type="table" rid="table2">Table 2</xref> shows the results of this stage. The reviewers agreed on 265 findings, and they disagreed on 357 findings.</p><table-wrap id="t2" position="float"><label>Table 2.</label><caption><p>Initial evaluation of quality indicator findings by two reviewers (JS and SH).</p></caption><table id="table2" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Option</td><td align="left" valign="bottom">Frequency</td></tr></thead><tbody><tr><td align="left" valign="top">Both reviewers performed a consistent mapping to a TMF<sup><xref ref-type="table-fn" rid="table2fn1">a</xref></sup> indicator</td><td align="left" valign="top">174</td></tr><tr><td align="left" valign="top">Both reviewers performed a mapping to a TMF indicator, but it was different</td><td align="left" valign="top">131</td></tr><tr><td align="left" valign="top">One reviewer performed a mapping to a TMF indicator; the other did not</td><td align="left" valign="top">152</td></tr><tr><td align="left" valign="top">Both reviewers unanimously opted for &#x201C;new indicator&#x201D;</td><td align="left" valign="top">43</td></tr><tr><td align="left" valign="top">Both reviewers unanimously opted for &#x201C;not applicable&#x201D;</td><td align="left" valign="top">48</td></tr><tr><td align="left" valign="top">Other scenarios, eg, combinations of new, not applicable, and undecidable</td><td align="left" valign="top">74</td></tr></tbody></table><table-wrap-foot><fn id="table2fn1"><p><sup>a</sup>TMF: Technology and Methods Platform for Networked Medical Research.</p></fn></table-wrap-foot></table-wrap><p>Both reviewers discussed the 357 disagreements and reached a consensus. Findings that were not considered proposals for an indicator, an indicator instance, or a quality check were deleted. Findings with suggestions for mapping to 2 or 3 indicators were broken down into individual entries leading to a numerical growth. This left 74 evidence sources with a total of 624 findings: 99 findings proposing a new indicator, 428 findings with a successful mapping to a TMF indicator, and 97 findings rejected. The median number of findings per source was 5.5 (range 1-54) findings. A total of 30% (187/624) of the findings came from 6 evidence sources [<xref ref-type="bibr" rid="ref30">30</xref>-<xref ref-type="bibr" rid="ref35">35</xref>]. Findings qualifying for a new indicator were combined based on the description in the respective evidence sources. Each new indicator was assigned to one of the 14 dimensions in the categories data and metadata quality. A Microsoft Access database was used for the literature review, including the management of the retrieved records, the reviews, and the analysis of agreement.</p></sec><sec id="s2-2-5"><title>IDEFIM Contributions</title><p>Proposals for new indicators beyond the literature review were permitted to maximize the use of community knowledge, even if the proposals were published more recently than the literature search. These proposals were identified through snowball sampling from evidence sources, a manual search of other publications proposing quality indicators, input from scientific conferences, and contributions from invited experts. According to the project work plan, the relationship between data quality and the concepts of diversity, equity, and inclusion (DEI) was examined based on the initial records from the literature search [<xref ref-type="bibr" rid="ref36">36</xref>]. Related indicators were also established and added to the indicator set.</p></sec></sec><sec id="s2-3"><title>Condensation of the Draft Indicator Set</title><p>Only indicators assigned to a dimension in categories data and metadata quality were accepted. According to the IDEFIM project protocol, TMF indicators without evidence from at least 1 independent source in the literature review were excluded from the final indicator set. Furthermore, the draft set was screened for possible merges. In addition to the originating pillar - TMF work, literature review, and IDEFIM contributions - the indicator proposals were not formally and empirically validated.</p></sec><sec id="s2-4"><title>Ethical Considerations</title><p>This research did not involve human participants.</p></sec></sec><sec id="s3" sec-type="results"><title>Results</title><p>The draft set included 81 indicators. A total of 6 initially kept TMF indicators were excluded from the draft set because they were not referenced by an evidence source. Additionally, 4 TMF indicators were defined as subconcepts of 2 other indicators using information expected on the level of indicator instances. These 4 indicators were also excluded (<xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). A total of 2 indicators, 1 TMF indicator and 1 indicator from the review, were skipped because they refer to a dimension in the category context quality. <xref ref-type="table" rid="table3">Table 3</xref> shows the distribution of the final set of 69 indicators. Of the 41 TMF indicators, 30 remained after condensation, all dealing with data quality. A total of 31 new indicators were added based on findings from the literature review. These 31 new indicators were distributed fairly evenly between both categories, data and metadata quality. While the total number of indicators was lower in category metadata quality, they were distributed fairly evenly across its dimensions. In contrast, the dimensions of the category data quality were unevenly covered by indicators. Accuracy (data), completeness (data), and consistency (data) were the most dominant here. The 8 indicators added by IDEFIM did not significantly change the distribution.</p><table-wrap id="t3" position="float"><label>Table 3.</label><caption><p>Distribution of the 69 indicators among the 14 dimensions in the data and metadata quality categories of the IDEFIM indicator set.</p></caption><table id="table3" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Category and dimension</td><td align="left" valign="bottom" colspan="4">Origin</td></tr><tr><td align="left" valign="bottom"/><td align="left" valign="bottom">Total</td><td align="left" valign="bottom">TMF<sup><xref ref-type="table-fn" rid="table3fn1">a</xref></sup></td><td align="left" valign="bottom">Literature review</td><td align="left" valign="bottom">IDEFIM contribution</td></tr></thead><tbody><tr><td align="left" valign="top">Data quality</td><td align="left" valign="top">53</td><td align="left" valign="top">30</td><td align="left" valign="top">16</td><td align="left" valign="top">7</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Accuracy (data)</td><td align="left" valign="top">12</td><td align="left" valign="top">6</td><td align="left" valign="top">4</td><td align="left" valign="top">2</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Completeness (cases)</td><td align="left" valign="top">3</td><td align="left" valign="top">2</td><td align="left" valign="top">&#x2014;<sup><xref ref-type="table-fn" rid="table3fn2">b</xref></sup></td><td align="left" valign="top">1</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Completeness (data)</td><td align="left" valign="top">12</td><td align="left" valign="top">7</td><td align="left" valign="top">3</td><td align="left" valign="top">2</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Compliance (data)</td><td align="left" valign="top">3</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">3</td><td align="left" valign="top">&#x2014;</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Consistency (data)</td><td align="left" valign="top">19</td><td align="left" valign="top">13</td><td align="left" valign="top">5</td><td align="left" valign="top">1</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Credibility</td><td align="left" valign="top">1</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">1</td><td align="left" valign="top">&#x2014;</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Currentness</td><td align="left" valign="top">1</td><td align="left" valign="top">1</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">&#x2014;</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Representativeness</td><td align="left" valign="top">2</td><td align="left" valign="top">1</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">1</td></tr><tr><td align="left" valign="top">Metadata quality</td><td align="left" valign="top">16</td><td align="left" valign="top">0</td><td align="left" valign="top">15</td><td align="left" valign="top">1</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Accuracy (metadata)</td><td align="left" valign="top">2</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">2</td><td align="left" valign="top">&#x2014;</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Completeness (metadata)</td><td align="left" valign="top">4</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">4</td><td align="left" valign="top">&#x2014;</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Compliance (metadata)</td><td align="left" valign="top">2</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">2</td><td align="left" valign="top">&#x2014;</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Consistency (metadata)</td><td align="left" valign="top">4</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">3</td><td align="left" valign="top">1</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Precision</td><td align="left" valign="top">2</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">2</td><td align="left" valign="top">&#x2014;</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Understandability</td><td align="left" valign="top">2</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">2</td><td align="left" valign="top">&#x2014;</td></tr><tr><td align="left" valign="top">Total</td><td align="left" valign="top">69</td><td align="left" valign="top">30</td><td align="left" valign="top">31</td><td align="left" valign="top">8</td></tr></tbody></table><table-wrap-foot><fn id="table3fn1"><p><sup>a</sup>TMF: Technology and Methods Platform for Networked Medical Research.</p></fn><fn id="table3fn2"><p><sup>b</sup>Not available.</p></fn></table-wrap-foot></table-wrap><p><xref ref-type="table" rid="table4">Table 4</xref> lists 53 indicators related to data quality organized into 16 indicator groups for clarity, with 1 to 4 groups per dimension. The 16 indicators contributed by the literature review originated from 26 evidence sources. IDEFIM added 3 indicators based on a separate analysis of DEI issues [<xref ref-type="bibr" rid="ref36">36</xref>]. Additionally, James et al [<xref ref-type="bibr" rid="ref37">37</xref>] propose the R-index as a standardized representativeness metric for benchmarking DEI in a dataset. The R-index is listed in the IDEFIM indicator set as &#x201C;Conspicuous representativeness distribution.&#x201D; To complete the consideration of the contingency table [<xref ref-type="bibr" rid="ref38">38</xref>,<xref ref-type="bibr" rid="ref39">39</xref>], the F-Measure was defined as the harmonic mean of correctness and recall based on a recommendation from the second workshop and named &#x201C;Validity.&#x201D; Time series, for example, for lab values, are a special phenomenon in health-related datasets. The idea of Giesa et al [<xref ref-type="bibr" rid="ref40">40</xref>] to address this phenomenon with an indicator that provides a rate of incomplete time series was accepted. While homonyms of observational units were considered in the TMF guideline, homonyms of pieces of information were missing and supplemented according to Woodall et al [<xref ref-type="bibr" rid="ref41">41</xref>].</p><table-wrap id="t4" position="float"><label>Table 4.</label><caption><p>Indicators related to data quality with origin and number of supporting evidence sources (ES).</p></caption><table id="table4" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom" colspan="2">Dimension, indicator group, and indicator</td><td align="left" valign="bottom">Origin</td><td align="left" valign="bottom">Number of ES<sup><xref ref-type="table-fn" rid="table4fn1">a</xref></sup></td></tr></thead><tbody><tr><td align="left" valign="top" colspan="4">Accuracy (data)</td></tr><tr><td align="left" valign="top" colspan="4"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Contingency table indicators</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Conspicuous correctness distribution</td><td align="left" valign="top">IDEFIM<sup><xref ref-type="table-fn" rid="table4fn2">b</xref></sup> (DEI<sup><xref ref-type="table-fn" rid="table4fn3">c</xref></sup>)</td><td align="char" char="." valign="top">&#x2014;<sup><xref ref-type="table-fn" rid="table4fn4">d</xref></sup></td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Correctness</td><td align="left" valign="top">TMF<sup><xref ref-type="table-fn" rid="table4fn5">e</xref></sup></td><td align="char" char="." valign="top">18</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Recall</td><td align="left" valign="top">TMF</td><td align="char" char="." valign="top">6</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Validity</td><td align="left" valign="top">IDEFIM</td><td align="left" valign="top">&#x2014;</td></tr><tr><td align="left" valign="top" colspan="4"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Disagreement with source data indicators</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Disagreement with source data referring to data elements</td><td align="left" valign="top">TMF</td><td align="char" char="." valign="top">21</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Disagreement with source data referring to observational units</td><td align="left" valign="top">TMF</td><td align="char" char="." valign="top">1</td></tr><tr><td align="left" valign="top" colspan="4"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Illegal content indicators</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Illegal values of qualitative data elements</td><td align="left" valign="top">TMF</td><td align="char" char="." valign="top">11</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Illegal values of qualitative data elements used for the coding of missings</td><td align="left" valign="top">TMF</td><td align="char" char="." valign="top">4</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Incorrect text in qualitative data elements</td><td align="left" valign="top">Review</td><td align="char" char="." valign="top">4</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Misfielded values</td><td align="left" valign="top">Review</td><td align="char" char="." valign="top">3</td></tr><tr><td align="left" valign="top" colspan="4"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Other indicators for accuracy (data)</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Granularity (data)</td><td align="left" valign="top">Review</td><td align="char" char="." valign="top">2</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Temporal trends in counts or proportions</td><td align="left" valign="top">Review</td><td align="char" char="." valign="top">1</td></tr><tr><td align="left" valign="top" colspan="4">Completeness (cases)</td></tr><tr><td align="left" valign="top" colspan="4"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Indicators for completeness (cases)</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Conspicuous recruitment rate distribution</td><td align="left" valign="top">IDEFIM (DEI)</td><td align="left" valign="top">&#x2014;</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Drop-out-rate</td><td align="left" valign="top">TMF</td><td align="char" char="." valign="top">6</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Recruitment rate</td><td align="left" valign="top">TMF</td><td align="char" char="." valign="top">15</td></tr><tr><td align="left" valign="top" colspan="4">Completeness (data)</td></tr><tr><td align="left" valign="top" colspan="4"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Missing content indicators</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Conspicuous missing values distribution</td><td align="left" valign="top">IDEFIM (DEI)<sup><xref ref-type="table-fn" rid="table4fn3">c</xref></sup></td><td align="left" valign="top">&#x2014;</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Missing modules</td><td align="left" valign="top">TMF</td><td align="char" char="." valign="top">5</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Missing values in data elements</td><td align="left" valign="top">TMF</td><td align="char" char="." valign="top">48</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Temporal missingness</td><td align="left" valign="top">IDEFIM [<xref ref-type="bibr" rid="ref40">40</xref>]</td><td align="left" valign="top">&#x2014;</td></tr><tr><td align="left" valign="top" colspan="4"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Other indicators for completeness (data)</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Data elements with existing entries for all observational units</td><td align="left" valign="top">TMF</td><td align="char" char="." valign="top">2</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Information density score</td><td align="left" valign="top">Review</td><td align="char" char="." valign="top">1</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Modules with existing entries for all data elements</td><td align="left" valign="top">Review</td><td align="char" char="." valign="top">1</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Observational units with existing entries for all data elements</td><td align="left" valign="top">Review</td><td align="char" char="." valign="top">4</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Observational units with follow-up</td><td align="left" valign="top">TMF</td><td align="char" char="." valign="top">1</td></tr><tr><td align="left" valign="top" colspan="4">Refusal rate indicators</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Refusal rate of investigations</td><td align="left" valign="top">TMF</td><td align="char" char="." valign="top">1</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Refusal rate of modules</td><td align="left" valign="top">TMF</td><td align="char" char="." valign="top">1</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Refusal rate of single data elements</td><td align="left" valign="top">TMF</td><td align="char" char="." valign="top">1</td></tr><tr><td align="left" valign="top" colspan="4">Compliance (data)</td></tr><tr><td align="left" valign="top" colspan="4"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Indicators for compliance (data)<named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content></td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Data format, data type, and unit compliance</td><td align="left" valign="top">Review</td><td align="char" char="." valign="top">13</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Incompliance with metadata</td><td align="left" valign="top">Review</td><td align="char" char="." valign="top">1</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Range compliance</td><td align="left" valign="top">Review</td><td align="char" char="." valign="top">8</td></tr><tr><td align="left" valign="top" colspan="4">Consistency (data)</td></tr><tr><td align="left" valign="top" colspan="4"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Confusion and redundancy indicators<named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content></td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Confusion</td><td align="left" valign="top">IDEFIM [<xref ref-type="bibr" rid="ref41">41</xref>]</td><td align="left" valign="top">&#x2014;</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Duplicates (data)</td><td align="left" valign="top">TMF</td><td align="char" char="." valign="top">13</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Homonyms (data)</td><td align="left" valign="top">TMF</td><td align="char" char="." valign="top">2</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Synonyms (data)</td><td align="left" valign="top">TMF</td><td align="char" char="." valign="top">8</td></tr><tr><td align="left" valign="top" colspan="4"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Contradiction indicators<named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content></td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Contradictions</td><td align="left" valign="top">TMF</td><td align="char" char="." valign="top">31</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Data element contradictions</td><td align="left" valign="top">Review</td><td align="char" char="." valign="top">2</td></tr><tr><td align="left" valign="top" colspan="4"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Other indicators for consistency (data)<named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content></td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Missing evidence of known correlations</td><td align="left" valign="top">TMF</td><td align="char" char="." valign="top">4</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Single data source per observational unit</td><td align="left" valign="top">TMF</td><td align="char" char="." valign="top">2</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Temporality of categorical data elements</td><td align="left" valign="top">Review</td><td align="char" char="." valign="top">1</td></tr><tr><td align="left" valign="top" colspan="4"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Unexpected entry indicators<named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content></td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Concordance</td><td align="left" valign="top">TMF</td><td align="char" char="." valign="top">14</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Conspicuous distribution of digits in date-time data elements</td><td align="left" valign="top">Review</td><td align="char" char="." valign="top">1</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Conspicuous distribution of values</td><td align="left" valign="top">TMF</td><td align="char" char="." valign="top">6</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Data elements with value unknown etc</td><td align="left" valign="top">TMF</td><td align="char" char="." valign="top">3</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Disagreement with previous values</td><td align="left" valign="top">TMF</td><td align="char" char="." valign="top">3</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Frequency outliers</td><td align="left" valign="top">Review</td><td align="char" char="." valign="top">1</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Last digit preferences</td><td align="left" valign="top">TMF</td><td align="char" char="." valign="top">2</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Outliers (continuous data elements)</td><td align="left" valign="top">TMF</td><td align="char" char="." valign="top">17</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Outliers in numerical data elements in a multivariate analysis</td><td align="left" valign="top">Review</td><td align="char" char="." valign="top">1</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Values from external references</td><td align="left" valign="top">TMF</td><td align="char" char="." valign="top">3</td></tr><tr><td align="left" valign="top" colspan="4">Credibility</td></tr><tr><td align="left" valign="top" colspan="4"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Indicators for credibility</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Data element credibility</td><td align="left" valign="top">Review</td><td align="char" char="." valign="top">2</td></tr><tr><td align="left" valign="top" colspan="4">Currentness</td></tr><tr><td align="left" valign="top" colspan="4"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Indicators for currentness</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Currentness</td><td align="left" valign="top">TMF</td><td align="char" char="." valign="top">26</td></tr><tr><td align="left" valign="top" colspan="4">Representativeness</td></tr><tr><td align="left" valign="top" colspan="4"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Indicators for representativeness</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Conspicuous representativeness distribution</td><td align="left" valign="top">IDEFIM [<xref ref-type="bibr" rid="ref37">37</xref>]</td><td align="left" valign="top">&#x2014;</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Representativeness</td><td align="left" valign="top">TMF</td><td align="char" char="." valign="top">7</td></tr></tbody></table><table-wrap-foot><fn id="table4fn1"><p><sup>a</sup>ES: evidence source.</p></fn><fn id="table4fn2"><p><sup>b</sup>IDEFIM: contributed by the project, not the literature review.</p></fn><fn id="table4fn3"><p><sup>c</sup>DEI: diversity, equity, and inclusion.</p></fn><fn id="table4fn4"><p><sup>d</sup>Not available.</p></fn><fn id="table4fn5"><p><sup>e</sup>TMF: Technology and Methods Platform for Networked Medical Research.</p></fn></table-wrap-foot></table-wrap><p>The number of supporting evidence sources derived from the literature review can be used to weight indicators against each other. A total of 3 indicators were affected by more than one-third of the 74 evidence sources: &#x201C;Missing values in data elements&#x201D; (48 evidence sources), &#x201C;Contradictions&#x201D; (31), and &#x201C;Currentness&#x201D; (26). Two additional indicators were supported by at least a quarter of the evidence sources: &#x201C;Correctness&#x201D; (18) and &#x201C;Disagreement with source data referring to data elements&#x201D; (21). <xref ref-type="supplementary-material" rid="app4">Multimedia Appendix 4</xref> gives a list of all evidence sources for each indicator in the category data quality.</p><p><xref ref-type="table" rid="table5">Table 5</xref> lists the 16 indicators related to metadata quality. Because there are fewer indicators, there is only 1 indicator group per dimension. The 15 indicators from the literature review came from 10 evidence sources. IDEFIM added 1 indicator: &#x201C;Duplicates&#x201D; refers to a comparison of metadata between multiple datasets, that intentionally do not overlap [<xref ref-type="bibr" rid="ref41">41</xref>]. This indicator counts redundantly defined data elements. In the category metadata quality, the number of supporting evidence sources was less suitable as an indicator weight. Only 2 indicators had more than 1 supporting evidence source, &#x201C;Metadata format, type, and unit compliance&#x201D; (3 evidence sources) and &#x201C;Granularity (metadata)&#x201D; (2). <xref ref-type="supplementary-material" rid="app5">Multimedia Appendix 5</xref> provides a list of all evidence sources for each indicator in the category metadata quality.</p><table-wrap id="t5" position="float"><label>Table 5.</label><caption><p>Indicators related to metadata quality with origin and number of supporting evidence sources (ES).</p></caption><table id="table5" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Dimension, indicator group, and indicator</td><td align="left" valign="bottom">Origin<sup><xref ref-type="table-fn" rid="table5fn1">a</xref></sup></td><td align="left" valign="bottom">Number of ES<sup><xref ref-type="table-fn" rid="table5fn2">b</xref></sup></td></tr></thead><tbody><tr><td align="left" valign="top" colspan="3">Accuracy (metadata)</td></tr><tr><td align="left" valign="top" colspan="3"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Indicators for accuracy (metadata)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Correctness (metadata)</td><td align="left" valign="top">Review</td><td align="left" valign="top">1</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Responsiveness</td><td align="left" valign="top">Review</td><td align="left" valign="top">1</td></tr><tr><td align="left" valign="top" colspan="3">Completeness (metadata)</td></tr><tr><td align="left" valign="top" colspan="3"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Indicators for completeness (metadata)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Completeness of administrative metadata</td><td align="left" valign="top">Review</td><td align="left" valign="top">1</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Coverage of all data elements</td><td align="left" valign="top">Review</td><td align="left" valign="top">1</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Data element completeness</td><td align="left" valign="top">Review</td><td align="left" valign="top">1</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Richness (metadata)</td><td align="left" valign="top">Review</td><td align="left" valign="top">1</td></tr><tr><td align="left" valign="top" colspan="3">Compliance (metadata)</td></tr><tr><td align="left" valign="top" colspan="3"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Indicators for compliance (metadata)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Data element compliance with reference</td><td align="left" valign="top">Review</td><td align="left" valign="top">1</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Metadata format, type, and unit compliance</td><td align="left" valign="top">Review</td><td align="left" valign="top">3</td></tr><tr><td align="left" valign="top" colspan="3">Consistency (metadata)</td></tr><tr><td align="left" valign="top" colspan="3"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Indicators for consistency (metadata)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Duplicates (metadata)</td><td align="left" valign="top">IDEFIM [<xref ref-type="bibr" rid="ref41">41</xref>]</td><td align="left" valign="top">&#x2014;<sup><xref ref-type="table-fn" rid="table5fn3">c</xref></sup></td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Heterogeneous representation of data elements</td><td align="left" valign="top">Review</td><td align="left" valign="top">1</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Homonyms (metadata)</td><td align="left" valign="top">Review</td><td align="left" valign="top">1</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Synonyms (metadata)</td><td align="left" valign="top">Review</td><td align="left" valign="top">1</td></tr><tr><td align="left" valign="top" colspan="3">Precision</td></tr><tr><td align="left" valign="top" colspan="3"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Indicators for precision</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Granularity (metadata)</td><td align="left" valign="top">Review</td><td align="left" valign="top">2</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Residual classes of qualitative data elements</td><td align="left" valign="top">Review</td><td align="left" valign="top">1</td></tr><tr><td align="left" valign="top" colspan="3">Understandability</td></tr><tr><td align="left" valign="top" colspan="3"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Indicators for understandability</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Easy of understanding</td><td align="left" valign="top">Review</td><td align="left" valign="top">1</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Relevance of the dataset&#x2019;s descriptive information</td><td align="left" valign="top">Review</td><td align="left" valign="top">1</td></tr></tbody></table><table-wrap-foot><fn id="table5fn1"><p><sup>a</sup>IDEFIM: contributed by the project, not the literature review.</p></fn><fn id="table5fn2"><p><sup>b</sup>ES: evidence source.</p></fn><fn id="table5fn3"><p><sup>c</sup>Not available.</p></fn></table-wrap-foot></table-wrap></sec><sec id="s4" sec-type="discussion"><title>Discussion</title><sec id="s4-1"><title>Principal Findings</title><p>With 69 indicators, the IDEFIM indicator set expanded its predecessor, the TMF guideline [<xref ref-type="bibr" rid="ref22">22</xref>], by one-third. The 2 most comprehensive indicator lists found in the literature review contributed 54 [<xref ref-type="bibr" rid="ref33">33</xref>] and 35 findings [<xref ref-type="bibr" rid="ref34">34</xref>]. ISO/IEC 25024 offers 63 quality measures [<xref ref-type="bibr" rid="ref10">10</xref>]. Considering IDEFIM&#x2019;s coverage of data as well as metadata quality, 69 indicators seem manageable compared to potential competitors. However, the IDEFIM set does not cover the categories context and openness quality. <xref ref-type="supplementary-material" rid="app6">Multimedia Appendix 6</xref> contains the specifications of each indicator.</p><p>As expected, not all TMF indicators were confirmed. However, the final set impressively demonstrated the relevance of the adopted indicators. The 8 most frequently mentioned indicators originated from the TMF guideline. Conversely, 21 out of 31 indicators added by the review were mentioned only once. In the category metadata quality, both the number of indicators was lower and the supporting evidence was weaker than in the category data quality. This does not necessarily mean that metadata quality was out of scope. Metadata quality might be appropriately evaluated with a mixture of measures (as is the case in the category data quality) and conditions (as it might be the case in the categories context and openness quality). Nevertheless, indicators of metadata quality could be a critical area for future work to effectively publish open data from empirical health research [<xref ref-type="bibr" rid="ref42">42</xref>].</p><p>The IDEFIM indicator set and its framework provide a foundation for measuring the quality of datasets in empirical health research. Further work could lead to a community approach of sharing and harmonizing instances and applications of its indicators. This includes recommendations for using the indicators, indicator instances, and quality checks in specific use cases. The cross-registry benchmarking of data in health services research can serve as an initial example here [<xref ref-type="bibr" rid="ref43">43</xref>]. The quality indicators were taken from the TMF guideline. Each participating registry then tailored the indicators to its own setting by specifying data elements considered with regard to missing values, determining its particular denominator with regard to the recruitment rate, and defining the rules used to count contradictions, for example. This tailoring resulted in quality indicator instances for each registry. However, the quality indicators&#x2019; common ground with identical meanings and thresholds allowed for a comparison of results between registries. Due to the registries&#x2019; different information models, data structures, and concept systems, each registry was responsible for specifying and implementing quality checks.</p></sec><sec id="s4-2"><title>Comparison With Prior Work</title><p>Like others, IDEFIM understood a quality indicator as a distinct entity, broken down into components in its specification. Woodall et al [<xref ref-type="bibr" rid="ref41">41</xref>] took a different approach. They distinguish between 9 data quality problems, 9 generic data quality methods, and 6 so-called taxonomy elements. Taxonomy elements combine different objects of a data structure from the perspective of a relational database management system. Quality measures arise from the cross-tabulation of data quality problems and taxonomy elements, which involves assigning none, 1, or several generic data quality methods to each intersection point. Of the initial 54 intersection points, 16 were left blank as senseless and 6 were labeled as gaps that could be filled in subsequent work. The remaining 32 intersection points were filled with 57 entries. To address all aspects proposed by Woodall et al [<xref ref-type="bibr" rid="ref41">41</xref>], IDEFIM added 2 indicators. &#x201C;Duplicates (metadata)&#x201D; represented the cross-domain analyses mentioned by Woodall et al [<xref ref-type="bibr" rid="ref41">41</xref>] at the level of data elements intended to reduce redundancy. Second, IDEFIM supplemented the data quality problem &#x201C;Existence of synonyms and homonyms&#x201D; with the indicator &#x201C;Confusion,&#x201D; which examines homonyms of values in a dataset. The approach of Woodall et al [<xref ref-type="bibr" rid="ref41">41</xref>] turned out to be a good training partner for IDEFIM; however, constructing indicators by combining different axes appeared too theoretical for practical use.</p><p>QUANTUM proposes a data quality and utility label [<xref ref-type="bibr" rid="ref44">44</xref>] that might be used in the EHDS according to article 78 of the respective regulation [<xref ref-type="bibr" rid="ref14">14</xref>]. In this regulation, a &#x201C;data quality and utility label means a graphic diagram, including a scale, describing the data quality and conditions of use of a dataset.&#x201D; This concept is closely related to measuring dataset quality with the IDEFIM framework. However, there is a fundamental difference between QUANTUM and IDEFIM. QUANTUM examines the documentation of quality-related aspects, while IDEFIM examines quality-related aspects directly. In combination, IDEFIM might constitute the basis from which QUANTUM derives its rating. <xref ref-type="table" rid="table6">Table 6</xref> shows a comparison between QUANTUM and IDEFIM. Most of QUANTUM&#x2019;s measure labels represent Boolean conditions that do not provide information about data or metadata quality, for example, the &#x201C;availability of a data access &#x0026; usage policy at the time of release of the dataset&#x201D;. Some measure labels represent measurable quantities such as the coverage rate. For this comparison, we assigned IDEFIM indicators to a measure label even if the label represents a Boolean condition. In these cases, the indicator can quantify the fulfillment of a condition or evaluate the fulfillment of a condition based on a predefined threshold.</p><table-wrap id="t6" position="float"><label>Table 6.</label><caption><p>Comparison between the measures of QUANTUM [<xref ref-type="bibr" rid="ref44">44</xref>] and IDEFIM.</p></caption><table id="table6" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom" colspan="2">QUANTUM</td><td align="left" valign="bottom">IDEFIM</td></tr><tr><td align="left" valign="bottom">Dimension</td><td align="left" valign="bottom">Measure label</td><td align="left" valign="bottom">Reference</td></tr></thead><tbody><tr><td align="left" valign="top">Accessibility</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Availability of a data access &#x0026; usage policy at the time of release of the dataset</p></list-item><list-item><p>Average time from data access application to data release for a speci&#xFB01;c dataset</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Category: openness quality</p></list-item><list-item><p>Dimension: availability</p></list-item><list-item><p>Category: openness quality</p></list-item><list-item><p>Dimension: accessibility</p></list-item></list></td></tr><tr><td align="left" valign="top">Population coverage</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Coverage Rate (percentage of the eligible population represented in the dataset)</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Category: data quality</p></list-item><list-item><p>Dimension: completeness (cases)</p></list-item><list-item><p>Quality indicator: &#x201C;Recruitment rate&#x201D;</p></list-item></list></td></tr><tr><td align="left" valign="top">Population representativity</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>How closely does the observed population represent the expected population?</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Category: data quality</p></list-item><list-item><p>Dimension: representativeness</p></list-item><list-item><p>Quality indicator: &#x201C;Representativeness&#x201D;</p></list-item></list></td></tr><tr><td align="left" valign="top">Compliance</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Is there documentation of compliance with ethical standards, conventions, protocols or regulations?</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Category: context quality</p></list-item><list-item><p>Dimension: compliance (context)</p></list-item></list></td></tr><tr><td align="left" valign="top">Data provenance</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Is the source of the dataset documented?</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Category: context quality</p></list-item><list-item><p>Dimension: provenance</p></list-item></list></td></tr><tr><td align="left" valign="top"/><td align="left" valign="top"><list list-type="bullet"><list-item><p>Are the processes and operations on the data documented?</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>[not addressed]</p></list-item></list></td></tr><tr><td align="left" valign="top">Metadata scope</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Existence of comprehensive standardised metadata</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Category: metadata quality</p></list-item><list-item><p>Dimension: completeness (metadata)</p></list-item><list-item><p>Quality indicator: &#x201C;Richness (metadata)&#x201D;</p></list-item></list></td></tr><tr><td align="left" valign="top"/><td align="left" valign="top"><list list-type="bullet"><list-item><p>Existence of an exhaustive data dictionary at variable level</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Category: metadata quality</p></list-item><list-item><p>Dimension: completeness (metadata)</p></list-item><list-item><p>Quality indicator: &#x201C;Coverage of all data elements&#x201D;</p></list-item></list></td></tr><tr><td align="left" valign="top">Accuracy</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Is the accuracy of the dataset documented?</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Category: data quality</p></list-item><list-item><p>Dimension: accuracy (data)</p></list-item><list-item><p>Quality indicator: [several]</p></list-item></list></td></tr><tr><td align="left" valign="top">Coherence</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Is coherence of the dataset documented?</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Category: data quality</p></list-item><list-item><p>Dimension: accuracy (data)</p></list-item><list-item><p>Group of indicators: illegal content</p></list-item><list-item><p>Quality indicator: [several]</p></list-item></list></td></tr><tr><td align="left" valign="top">Completeness</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Is completeness of the dataset documented?</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Category: data quality</p></list-item><list-item><p>Dimension: completeness (data)</p></list-item><list-item><p>Quality indicator: [several]</p></list-item></list></td></tr><tr><td align="left" valign="top">Consistency</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Is the consistency of the dataset documented?</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Category: data quality</p></list-item><list-item><p>Dimension: consistency (data)</p></list-item><list-item><p>Quality indicator: [several]</p></list-item></list></td></tr><tr><td align="left" valign="top">Precision</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Is the precision of the dataset documented?</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Category: metadata quality</p></list-item><list-item><p>Dimension: precision</p></list-item><list-item><p>Quality indicator: &#x201C;Granularity (metadata)&#x201D;</p></list-item></list></td></tr><tr><td align="left" valign="top">Validity</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Availability of a conformance report for the data model</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Category: data quality</p></list-item><list-item><p>Dimension: compliance (data)</p></list-item><list-item><p>Quality indicator: [several]</p></list-item></list></td></tr></tbody></table></table-wrap><p>From the comparison, one can conclude that IDEFIM addressed all measures of QUANTUM with quality indicators, if the categories data quality and metadata quality are concerned. Therefore, using the IDEFIM indicator set will simplify the registration for the EHDS data quality and utility label to this extent. However, the terminology of QUANTUM differs and does not refer to ISO standards, terms, or definitions. This can be attributed to the unusual framework and wording of the EHDS regulation.</p></sec><sec id="s4-3"><title>Limitations</title><p>Some limitations of the work must be noted. First, the variety of terminologies used to define data quality and its dimensions posed a challenge. For instance, we found the terms aspects, attributes, categories, characteristics, domains, features, indicators, metrics, processes, and submetrics to be synonyms or siblings of the term &#x201C;dimension.&#x201D; The same was true for the term &#x201C;indicator&#x201D; noted, for example, as measure, check, or even dimension. In cases of doubt, the criteria were interpreted generously that may have led to misleading sources. Second, maintaining the focus on empirical health research was difficult. This dilemma demonstrates the importance of publishing terms in a citable manner to avoid ambiguity in interpretation and to encourage reliable reuse. Substantial methodological contributions to data quality originated from other fields, even outside of health care. We did not want to overlook important proposals for quality indicators, so we accepted some sources from outside the domain of empirical health research. The domain addressed in the literature was sometimes unclear or overlapped with empirical health research. Particularly, the secondary use of data raises awareness about the initial quality of health-related data, that is, the quality of data from daily health care. Sources were also accepted outside the intended domain if their approach appeared relevant. Third, IDEFIM does not reflect solely the result of its literature review. Although IDEFIM considered the PRISMA extension for Scoping Reviews [<xref ref-type="bibr" rid="ref45">45</xref>], it uses the results of the literature review as one but not the only basis for its indicator set. However, it would not have been possible to develop a relevant indicator set alone from the literature review due to the terminological but also conceptual confusion. Therefore, the IDEFIM indicator set offers a unique perspective. The bottom-up approach that gives evidence to each indicator is a strength and a unique selling point. Fourth, it is important to note that formal consensus mechanisms were not applied to all parts of this work. For instance, the literature review was one but not the only pillar that led to the inclusion of indicator proposals in the IDEFIM indicator set. Ultimately, the IDEFIM indicator set reflects the decisions of the core project team. Furthermore, a formal and empirical validation of individual indicators was outside the scope of the presented work. Whenever possible and appropriate, this validation is a project of its own for each indicator.</p></sec><sec id="s4-4"><title>Conclusions</title><p>IDEFIM offers the most up-to-date set of indicators intended to measure the quality of a dataset in empirical health research, as far as the authors know. This set can be used to manage and control the data collection in a clinical trial, a cohort study, or a patient registry, for example. Furthermore, datasets can be labeled according to their fitness for purpose in principle. Then, health data users can verify electronic health data or claims data with respect to the intended use. All indicators of the IDEFIM set were defined using a uniform structure and integrated into the IDEFIM framework of categories and dimensions. However, there is no automatism that allows for the application of the indicators from scratch. It is up to the users to adapt quality indicators to their specific needs, eg, data elements of interest, to create respective indicator instances, and to implement quality checks operating on the data representation layer. This work does not oppose tool-based approaches to data quality control, such as the DataQualityDashboard [<xref ref-type="bibr" rid="ref46">46</xref>] and dataquieR [<xref ref-type="bibr" rid="ref47">47</xref>]. Rather, it offers a conceptual basis for the appropriate application of related tools in empirical health research practice.</p></sec></sec></body><back><ack><p>The authors used DeepL Write in the refinement, correction, editing, or formatting of the manuscript during the review process to improve clarity of language.</p></ack><notes><sec><title>Funding</title><p>The project was funded by the Deutsche Forschungsgemeinschaft (DFG, German Research Foundation) - 506700202.</p></sec><sec><title>Data Availability</title><p>The individual indicator specifications in version 0.8 are available in <xref ref-type="supplementary-material" rid="app6">Multimedia Appendix 6</xref>.</p></sec></notes><fn-group><fn fn-type="con"><p>Conceptualization: JS</p><p>Data curation: SH</p><p>Formal analysis: SH</p><p>Funding acquisition: JS</p><p>Methodology: JS</p><p>Project administration: SH</p><p>Resources: SH, JS</p><p>Software: SH</p><p>Supervision: JS</p><p>Validation: OJB, CD, TH, EJ, ML, SM, CS</p><p>Writing &#x2013; original draft: SH, JS</p><p>Writing &#x2013; review and editing: SH, OJB, CD, TH, EJ, ML, SM, CS, JS</p></fn><fn fn-type="conflict"><p>None declared.</p></fn></fn-group><glossary><title>Abbreviations</title><def-list><def-item><term id="abb1">CONSORT</term><def><p>Consolidated Standards of Reporting Trials</p></def></def-item><def-item><term id="abb2">DEI</term><def><p>diversity, equity, and inclusion</p></def></def-item><def-item><term id="abb3">DFG</term><def><p>Deutsche Forschungsgemeinschaft</p></def></def-item><def-item><term id="abb4">EHDS</term><def><p>European Health Data Space</p></def></def-item><def-item><term id="abb5">ES</term><def><p>evidence source</p></def></def-item><def-item><term id="abb6">IEC</term><def><p>International Electrotechnical Commission</p></def></def-item><def-item><term id="abb7">ISO</term><def><p>International Organization for Standardization</p></def></def-item><def-item><term id="abb8">PDCA</term><def><p>plan-do-check-act</p></def></def-item><def-item><term id="abb9">PRISMA</term><def><p>Preferred Reporting Items for Systematic Reviews and Meta-Analyses</p></def></def-item><def-item><term id="abb10">STROBE</term><def><p>Strengthening the Reporting of Observational Studies in Epidemiology</p></def></def-item><def-item><term id="abb11">TMF</term><def><p>Technology and Methods Platform for Networked Medical Research</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="report"><article-title>ICH harmonised guideline for good clinical practice E6(R3)</article-title><year>2025</year><month>01</month><day>6</day><access-date>2025-12-29</access-date><publisher-name>International Council for Harmonisation of Technical Requirements for Pharmaceuticals for Human Use</publisher-name><comment><ext-link ext-link-type="uri" xlink:href="https://database.ich.org/sites/default/files/ICH_E6(R3)_Step4_FinalGuideline_2025_0106.pdf">https://database.ich.org/sites/default/files/ICH_E6(R3)_Step4_FinalGuideline_2025_0106.pdf</ext-link></comment></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Gliklich</surname><given-names>RE</given-names> </name><name name-style="western"><surname>Leavy</surname><given-names>MB</given-names> </name><name name-style="western"><surname>Dreyer</surname><given-names>NA</given-names> </name></person-group><source>Registries for Evaluating Patient Outcomes: A User&#x2019;s Guide</source><year>2020</year><publisher-name>Agency for Healthcare Research and Quality</publisher-name><pub-id pub-id-type="doi">10.23970/AHRQEPCREGISTRIES4</pub-id></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Stausberg</surname><given-names>J</given-names> </name><name name-style="western"><surname>Maier</surname><given-names>B</given-names> </name><name name-style="western"><surname>Bestehorn</surname><given-names>K</given-names> </name><etal/></person-group><article-title>Memorandum registry for health services research: update 2019</article-title><source>Gesundheitswesen</source><year>2020</year><month>03</month><volume>82</volume><issue>3</issue><fpage>e39</fpage><lpage>e66</lpage><pub-id pub-id-type="doi">10.1055/a-1083-6417</pub-id><pub-id pub-id-type="medline">32069507</pub-id></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Hopewell</surname><given-names>S</given-names> </name><name name-style="western"><surname>Chan</surname><given-names>AW</given-names> </name><name name-style="western"><surname>Collins</surname><given-names>GS</given-names> </name><etal/></person-group><article-title>CONSORT 2025 statement: updated guideline for reporting randomised trials</article-title><source>Lancet</source><year>2025</year><month>04</month><day>14</day><volume>405</volume><fpage>1633</fpage><lpage>1640</lpage><pub-id pub-id-type="doi">10.1016/S0140-6736(25)00672-5</pub-id><pub-id pub-id-type="medline">40245901</pub-id></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Elm</surname><given-names>E von</given-names> </name><name name-style="western"><surname>Altman</surname><given-names>DG</given-names> </name><name name-style="western"><surname>Egger</surname><given-names>M</given-names> </name><name name-style="western"><surname>Pocock</surname><given-names>SJ</given-names> </name><name name-style="western"><surname>G&#x00F8;tzsche</surname><given-names>PC</given-names> </name><name name-style="western"><surname>Vandenbroucke</surname><given-names>JP</given-names> </name></person-group><article-title>Strengthening the reporting of observational studies in epidemiology (STROBE) statement: guidelines for reporting observational studies</article-title><source>BMJ</source><year>2007</year><month>10</month><day>20</day><volume>335</volume><issue>7624</issue><fpage>806</fpage><lpage>808</lpage><pub-id pub-id-type="doi">10.1136/bmj.39335.541782.AD</pub-id></nlm-citation></ref><ref id="ref6"><label>6</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Deming</surname><given-names>WE</given-names> </name></person-group><source>Out of The Crisis: Quality, Productivity and Competitive Position</source><year>1982</year><publisher-name>Cambridge University Press</publisher-name><pub-id pub-id-type="other">0911379010</pub-id></nlm-citation></ref><ref id="ref7"><label>7</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Crosby</surname><given-names>PB</given-names> </name></person-group><source>Quality Is Free: The Art of Making Quality Certain</source><year>1980</year><publisher-name>Penguin</publisher-name><pub-id pub-id-type="other">0-451-62585-4</pub-id></nlm-citation></ref><ref id="ref8"><label>8</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Juran</surname><given-names>JM</given-names> </name><name name-style="western"><surname>Godfrey</surname><given-names>AB</given-names> </name></person-group><source>Juran&#x2019;s Quality Handbook</source><year>1999</year><edition>5</edition><publisher-name>McGraw Hill</publisher-name><pub-id pub-id-type="other">0-07-034003-X</pub-id></nlm-citation></ref><ref id="ref9"><label>9</label><nlm-citation citation-type="web"><article-title>ISO 8000-1:2022 data quality &#x2014; part 1: overview</article-title><source>International Organization for Standardization (ISO)</source><access-date>2026-06-03</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.iso.org/standard/81745.html">https://www.iso.org/standard/81745.html</ext-link></comment></nlm-citation></ref><ref id="ref10"><label>10</label><nlm-citation citation-type="web"><article-title>ISO/IEC 25024:2015(en), systems and software engineering &#x2014; systems and software quality requirements and evaluation (square) &#x2014; measurement of data quality</article-title><source>International Organization for Standardization (ISO)</source><access-date>2026-06-03</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.iso.org/obp/ui/#iso:std:iso-iec:25024:ed-1:v1:en">https://www.iso.org/obp/ui/#iso:std:iso-iec:25024:ed-1:v1:en</ext-link></comment></nlm-citation></ref><ref id="ref11"><label>11</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>An</surname><given-names>D</given-names> </name><name name-style="western"><surname>Lim</surname><given-names>M</given-names> </name><name name-style="western"><surname>Lee</surname><given-names>S</given-names> </name></person-group><article-title>Challenges for data quality in the clinical data life cycle: systematic review</article-title><source>J Med Internet Res</source><year>2025</year><month>04</month><day>23</day><volume>27</volume><fpage>e60709</fpage><pub-id pub-id-type="doi">10.2196/60709</pub-id><pub-id pub-id-type="medline">40266662</pub-id></nlm-citation></ref><ref id="ref12"><label>12</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Weiskopf</surname><given-names>NG</given-names> </name><name name-style="western"><surname>Weng</surname><given-names>C</given-names> </name></person-group><article-title>Methods and dimensions of electronic health record data quality assessment: enabling reuse for clinical research</article-title><source>J Am Med Inform Assoc</source><year>2013</year><month>01</month><day>1</day><volume>20</volume><issue>1</issue><fpage>144</fpage><lpage>151</lpage><pub-id pub-id-type="doi">10.1136/amiajnl-2011-000681</pub-id><pub-id pub-id-type="medline">22733976</pub-id></nlm-citation></ref><ref id="ref13"><label>13</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wilkinson</surname><given-names>MD</given-names> </name><name name-style="western"><surname>Dumontier</surname><given-names>M</given-names> </name><name name-style="western"><surname>Aalbersberg</surname><given-names>IJJ</given-names> </name><etal/></person-group><article-title>The FAIR Guiding Principles for scientific data management and stewardship</article-title><source>Sci Data</source><year>2016</year><month>03</month><day>15</day><volume>3</volume><issue>1</issue><fpage>160018</fpage><pub-id pub-id-type="doi">10.1038/sdata.2016.18</pub-id><pub-id pub-id-type="medline">26978244</pub-id></nlm-citation></ref><ref id="ref14"><label>14</label><nlm-citation citation-type="web"><article-title>Regulation (EU) 2025/327 of the European parliament and of the council of 11 February 2025 on the European health data space and amending directive 2011/24/EU and regulation (EU) 2024/2847 (text with EEA relevance)</article-title><source>EUR-Lex</source><access-date>2025-12-29</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://eur-lex.europa.eu/eli/reg/2025/327/oj/eng">https://eur-lex.europa.eu/eli/reg/2025/327/oj/eng</ext-link></comment></nlm-citation></ref><ref id="ref15"><label>15</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Malin</surname><given-names>JL</given-names> </name><name name-style="western"><surname>Keating</surname><given-names>NL</given-names> </name></person-group><article-title>The cost-quality trade-off: need for data quality standards for studies that impact clinical practice and health policy</article-title><source>J Clin Oncol</source><year>2005</year><month>07</month><day>20</day><volume>23</volume><issue>21</issue><fpage>4581</fpage><lpage>4584</lpage><pub-id pub-id-type="doi">10.1200/JCO.2005.01.912</pub-id><pub-id pub-id-type="medline">15851767</pub-id></nlm-citation></ref><ref id="ref16"><label>16</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Siddique</surname><given-names>SM</given-names> </name><name name-style="western"><surname>Evans</surname><given-names>CV</given-names> </name><name name-style="western"><surname>Harhay</surname><given-names>M</given-names> </name><etal/></person-group><article-title>Critical appraisal for racial and ethnic equity in clinical prediction models extension: development of a critical appraisal tool extension to assess racial and ethnic equity-related risk of bias for clinical prediction models</article-title><source>Health Equity</source><year>2023</year><volume>7</volume><issue>1</issue><fpage>773</fpage><lpage>781</lpage><pub-id pub-id-type="doi">10.1089/heq.2023.0035</pub-id><pub-id pub-id-type="medline">38076212</pub-id></nlm-citation></ref><ref id="ref17"><label>17</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ramasamy</surname><given-names>A</given-names> </name><name name-style="western"><surname>Chowdhury</surname><given-names>S</given-names> </name></person-group><article-title>Big data quality dimensions: a systematic literature review</article-title><source>Journal of Information Systems and Technology Management</source><year>2020</year><volume>17</volume><fpage>e202017003</fpage><pub-id pub-id-type="doi">10.4301/S1807-1775202017003</pub-id></nlm-citation></ref><ref id="ref18"><label>18</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Assaf</surname><given-names>A</given-names> </name><name name-style="western"><surname>Senart</surname><given-names>A</given-names> </name><name name-style="western"><surname>Troncy</surname><given-names>R</given-names> </name></person-group><article-title>Towards an objective assessment framework for linked data quality: enriching dataset profiles with quality indicators</article-title><source>Int J Semant Web Inf Syst</source><year>2016</year><volume>12</volume><fpage>111</fpage><lpage>133</lpage><pub-id pub-id-type="doi">10.4018/IJSWIS.2016070104</pub-id></nlm-citation></ref><ref id="ref19"><label>19</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Aspinall</surname><given-names>PJ</given-names> </name></person-group><article-title>Measuring the health patterns of the &#x2018;mixed/multiple&#x2019; ethnic group in Britain: data quality problems, reporting issues, and implications for policy</article-title><source>Int J Soc Res Methodol</source><year>2018</year><month>05</month><day>4</day><volume>21</volume><issue>3</issue><fpage>359</fpage><lpage>371</lpage><pub-id pub-id-type="doi">10.1080/13645579.2017.1399623</pub-id></nlm-citation></ref><ref id="ref20"><label>20</label><nlm-citation citation-type="web"><article-title>ISO 8000-2:2022(en), data quality - part 2: vocabulary</article-title><source>International Organization for Standardization (ISO)</source><access-date>2026-06-03</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.iso.org/obp/ui/en/#iso:std:iso:8000:-2:ed-5:v1:en">https://www.iso.org/obp/ui/en/#iso:std:iso:8000:-2:ed-5:v1:en</ext-link></comment></nlm-citation></ref><ref id="ref21"><label>21</label><nlm-citation citation-type="report"><article-title>ISO/IEC 11179-33:2023(E) Information technology - metadata registries (MDR) - part 33: metamodel for data set registration</article-title><year>2023</year><access-date>2026-06-03</access-date><publisher-name>ISO/IEC</publisher-name><comment><ext-link ext-link-type="uri" xlink:href="https://cdn.standards.iteh.ai/samples/81725/ec9431074c324417a349c5fddbdf7ac9/ISO-IEC-11179-33-2023.pdf">https://cdn.standards.iteh.ai/samples/81725/ec9431074c324417a349c5fddbdf7ac9/ISO-IEC-11179-33-2023.pdf</ext-link></comment></nlm-citation></ref><ref id="ref22"><label>22</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Stausberg</surname><given-names>J</given-names> </name><name name-style="western"><surname>Bauer</surname><given-names>U</given-names> </name><name name-style="western"><surname>Nasseh</surname><given-names>D</given-names> </name><etal/></person-group><article-title>Indicators of data quality: review and requirements from the perspective of networked medical research</article-title><source>GMS Med Inform Biom Epidemiol</source><year>2019</year><volume>15</volume><fpage>Doc05</fpage><pub-id pub-id-type="doi">10.3205/mibe000199</pub-id></nlm-citation></ref><ref id="ref23"><label>23</label><nlm-citation citation-type="report"><article-title>ISO/IEC 25012:2008(E) Software engineering - software product quality requirements and evaluation (square) - data quality model</article-title><year>2008</year><access-date>2026-06-03</access-date><publisher-name>ISO/IEC</publisher-name><comment><ext-link ext-link-type="uri" xlink:href="https://cdn.standards.iteh.ai/samples/35736/3791c8ca8fd64a7fa4c7b629ec8f8524/ISO-IEC-25012-2008.pdf">https://cdn.standards.iteh.ai/samples/35736/3791c8ca8fd64a7fa4c7b629ec8f8524/ISO-IEC-25012-2008.pdf</ext-link></comment></nlm-citation></ref><ref id="ref24"><label>24</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Stausberg</surname><given-names>J</given-names> </name><name name-style="western"><surname>Harkener</surname><given-names>S</given-names> </name><name name-style="western"><surname>B&#x00FC;nz</surname><given-names>S</given-names> </name></person-group><article-title>A collection of data quality indicators for health research: rationale for an update</article-title><source>Stud Health Technol Inform</source><year>2024</year><month>11</month><day>22</day><volume>321</volume><fpage>254</fpage><lpage>258</lpage><pub-id pub-id-type="doi">10.3233/SHTI241103</pub-id><pub-id pub-id-type="medline">39575819</pub-id></nlm-citation></ref><ref id="ref25"><label>25</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Stausberg</surname><given-names>J</given-names> </name><name name-style="western"><surname>Harkener</surname><given-names>S</given-names> </name><name name-style="western"><surname>Draeger</surname><given-names>C</given-names> </name><etal/></person-group><article-title>Quality of data sets in empirical health research: the IDEFIM framework</article-title><source>Stud Health Technol Inform</source><year>2025</year><month>08</month><day>7</day><volume>329</volume><fpage>103</fpage><lpage>107</lpage><pub-id pub-id-type="doi">10.3233/SHTI250810</pub-id><pub-id pub-id-type="medline">40775828</pub-id></nlm-citation></ref><ref id="ref26"><label>26</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Stausberg</surname><given-names>J</given-names> </name><name name-style="western"><surname>Harkener</surname><given-names>S</given-names> </name></person-group><article-title>How to define a data quality indicator?</article-title><source>Stud Health Technol Inform</source><year>2025</year><month>06</month><day>26</day><volume>328</volume><fpage>266</fpage><lpage>267</lpage><pub-id pub-id-type="doi">10.3233/SHTI250716</pub-id><pub-id pub-id-type="medline">40588923</pub-id></nlm-citation></ref><ref id="ref27"><label>27</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Martin</surname><given-names>EG</given-names> </name><name name-style="western"><surname>Law</surname><given-names>J</given-names> </name><name name-style="western"><surname>Ran</surname><given-names>W</given-names> </name><name name-style="western"><surname>Helbig</surname><given-names>N</given-names> </name><name name-style="western"><surname>Birkhead</surname><given-names>GS</given-names> </name></person-group><article-title>Evaluating the quality and usability of open data for public health research: a systematic review of data offerings on 3 open data platforms</article-title><source>J Public Health Manag Pract</source><year>2017</year><volume>23</volume><issue>4</issue><fpage>e5</fpage><lpage>e13</lpage><pub-id pub-id-type="doi">10.1097/PHH.0000000000000388</pub-id><pub-id pub-id-type="medline">26910872</pub-id></nlm-citation></ref><ref id="ref28"><label>28</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Nonnemacher</surname><given-names>M</given-names> </name><name name-style="western"><surname>Weiland</surname><given-names>D</given-names> </name><name name-style="western"><surname>Stausberg</surname><given-names>J</given-names> </name></person-group><source>Datenqualit&#x00E4;t in Der Medizinischen Forschung Leitlinie Zum Adaptiven Management von Datenqualit&#x00E4;t in Kohortenstudien Und Registern</source><year>2007</year><publisher-name>Medizinisch Wissenschaftliche Verlagsgesellschaft</publisher-name><pub-id pub-id-type="other">978-3-939069-36-2</pub-id></nlm-citation></ref><ref id="ref29"><label>29</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Nonnemacher</surname><given-names>M</given-names> </name><name name-style="western"><surname>Nasseh</surname><given-names>D</given-names> </name><name name-style="western"><surname>Stausberg</surname><given-names>J</given-names> </name></person-group><source>Datenqualit&#x00E4;t in Der Medizinischen Forschung: Leitlinie Zum Adaptiven Management von Datenqualit&#x00E4;t in Kohortenstudien Und Registern</source><year>2014</year><publisher-name>Medizinisch Wissenschaftliche Verlagsgesellschaft</publisher-name><comment><ext-link ext-link-type="uri" xlink:href="https://www.mwv-open.de/site/books/10.32745/9783954663743">https://www.mwv-open.de/site/books/10.32745/9783954663743</ext-link></comment><pub-id pub-id-type="doi">10.32745/9783954663743</pub-id><pub-id pub-id-type="other">978-3-95466-121-3</pub-id></nlm-citation></ref><ref id="ref30"><label>30</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Blacketer</surname><given-names>C</given-names> </name><name name-style="western"><surname>Defalco</surname><given-names>FJ</given-names> </name><name name-style="western"><surname>Ryan</surname><given-names>PB</given-names> </name><name name-style="western"><surname>Rijnbeek</surname><given-names>PR</given-names> </name></person-group><article-title>Increasing trust in real-world evidence through evaluation of observational data quality</article-title><source>J Am Med Inform Assoc</source><year>2021</year><month>09</month><day>18</day><volume>28</volume><issue>10</issue><fpage>2251</fpage><lpage>2257</lpage><pub-id pub-id-type="doi">10.1093/jamia/ocab132</pub-id><pub-id pub-id-type="medline">34313749</pub-id></nlm-citation></ref><ref id="ref31"><label>31</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Laberge</surname><given-names>M</given-names> </name><name name-style="western"><surname>Shachak</surname><given-names>A</given-names> </name></person-group><article-title>Developing a tool to assess the quality of socio-demographic data in community health centres</article-title><source>Appl Clin Inform</source><year>2013</year><volume>4</volume><issue>1</issue><fpage>1</fpage><lpage>11</lpage><pub-id pub-id-type="doi">10.4338/ACI-2012-10-CR-0041</pub-id><pub-id pub-id-type="medline">23650483</pub-id></nlm-citation></ref><ref id="ref32"><label>32</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Liu</surname><given-names>CH</given-names> </name><name name-style="western"><surname>Talaei-Khoei</surname><given-names>A</given-names> </name><name name-style="western"><surname>Storey</surname><given-names>VC</given-names> </name><name name-style="western"><surname>Peng</surname><given-names>GC</given-names> </name></person-group><article-title>A Review of the State of the Art of Data Quality in Healthcare</article-title><source>Journal of Global Information Management</source><year>2023</year><month>01</month><volume>31</volume><issue>1</issue><fpage>1</fpage><lpage>18</lpage><pub-id pub-id-type="doi">10.4018/JGIM.316236</pub-id></nlm-citation></ref><ref id="ref33"><label>33</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Quindroit</surname><given-names>P</given-names> </name><name name-style="western"><surname>Fruchart</surname><given-names>M</given-names> </name><name name-style="western"><surname>Degoul</surname><given-names>S</given-names> </name><etal/></person-group><article-title>Definition of a practical taxonomy for referencing data quality problems in health care databases</article-title><source>Methods Inf Med</source><year>2023</year><month>05</month><volume>62</volume><issue>1-02</issue><fpage>19</fpage><lpage>30</lpage><pub-id pub-id-type="doi">10.1055/a-1976-2371</pub-id><pub-id pub-id-type="medline">36356592</pub-id></nlm-citation></ref><ref id="ref34"><label>34</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Schmidt</surname><given-names>CO</given-names> </name><name name-style="western"><surname>Struckmann</surname><given-names>S</given-names> </name><name name-style="western"><surname>Enzenbach</surname><given-names>C</given-names> </name><etal/></person-group><article-title>Facilitating harmonized data quality assessments. A data quality framework for observational health research data collections with software implementations in R</article-title><source>BMC Med Res Methodol</source><year>2021</year><month>04</month><day>2</day><volume>21</volume><issue>1</issue><fpage>63</fpage><pub-id pub-id-type="doi">10.1186/s12874-021-01252-7</pub-id><pub-id pub-id-type="medline">33810787</pub-id></nlm-citation></ref><ref id="ref35"><label>35</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wu</surname><given-names>D</given-names> </name><name name-style="western"><surname>Xu</surname><given-names>H</given-names> </name><name name-style="western"><surname>Yongyi</surname><given-names>W</given-names> </name><name name-style="western"><surname>Zhu</surname><given-names>H</given-names> </name></person-group><article-title>Quality of government health data in COVID-19: definition and testing of an open government health data quality evaluation framework</article-title><source>LHT</source><year>2022</year><month>03</month><day>29</day><volume>40</volume><issue>2</issue><fpage>516</fpage><lpage>534</lpage><pub-id pub-id-type="doi">10.1108/LHT-04-2021-0126</pub-id></nlm-citation></ref><ref id="ref36"><label>36</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>B&#x00FC;nz</surname><given-names>S</given-names> </name><name name-style="western"><surname>Harkener</surname><given-names>S</given-names> </name><name name-style="western"><surname>Stausberg</surname><given-names>J</given-names> </name></person-group><article-title>Data quality and diversity in health care and health research</article-title><source>Stud Health Technol Inform</source><year>2025</year><month>05</month><day>15</day><volume>327</volume><fpage>442</fpage><lpage>446</lpage><pub-id pub-id-type="doi">10.3233/SHTI250376</pub-id><pub-id pub-id-type="medline">40380486</pub-id></nlm-citation></ref><ref id="ref37"><label>37</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>James</surname><given-names>SL</given-names> </name><name name-style="western"><surname>Bourgognon</surname><given-names>M</given-names> </name><name name-style="western"><surname>Vieira</surname><given-names>PP</given-names> </name><etal/></person-group><article-title>R-index: a standardized representativeness metric for benchmarking diversity, equity, and inclusion in biopharmaceutical clinical trial development</article-title><source>EClinicalMedicine</source><year>2025</year><month>02</month><volume>80</volume><fpage>103079</fpage><pub-id pub-id-type="doi">10.1016/j.eclinm.2025.103079</pub-id><pub-id pub-id-type="medline">39968390</pub-id></nlm-citation></ref><ref id="ref38"><label>38</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Hogan</surname><given-names>WR</given-names> </name><name name-style="western"><surname>Wagner</surname><given-names>MM</given-names> </name></person-group><article-title>Accuracy of data in computer-based patient records</article-title><source>J Am Med Inform Assoc</source><year>1997</year><volume>4</volume><issue>5</issue><fpage>342</fpage><lpage>355</lpage><pub-id pub-id-type="doi">10.1136/jamia.1997.0040342</pub-id><pub-id pub-id-type="medline">9292840</pub-id></nlm-citation></ref><ref id="ref39"><label>39</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Logan</surname><given-names>JR</given-names> </name><name name-style="western"><surname>Gorman</surname><given-names>PN</given-names> </name><name name-style="western"><surname>Middleton</surname><given-names>B</given-names> </name></person-group><article-title>Measuring the quality of medical records: a method for comparing completeness and correctness of clinical encounter data</article-title><source>Proc AMIA Symp</source><year>2001</year><fpage>408</fpage><lpage>412</lpage><pub-id pub-id-type="medline">11825220</pub-id></nlm-citation></ref><ref id="ref40"><label>40</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Giesa</surname><given-names>N</given-names> </name><name name-style="western"><surname>Akguel</surname><given-names>M</given-names> </name><name name-style="western"><surname>Boie</surname><given-names>SD</given-names> </name><name name-style="western"><surname>Balzer</surname><given-names>F</given-names> </name></person-group><article-title>GRU-D characterizes age-specific temporal missingness in MIMIC-IV</article-title><source>Stud Health Technol Inform</source><year>2025</year><month>05</month><day>15</day><volume>327</volume><fpage>472</fpage><lpage>476</lpage><pub-id pub-id-type="doi">10.3233/SHTI250382</pub-id><pub-id pub-id-type="medline">40380492</pub-id></nlm-citation></ref><ref id="ref41"><label>41</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Woodall</surname><given-names>P</given-names> </name><name name-style="western"><surname>Oberhofer</surname><given-names>M</given-names> </name><name name-style="western"><surname>Borek</surname><given-names>A</given-names> </name></person-group><article-title>A classification of data quality assessment and improvement methods</article-title><source>IJIQ</source><year>2014</year><volume>3</volume><issue>4</issue><fpage>298</fpage><pub-id pub-id-type="doi">10.1504/IJIQ.2014.068656</pub-id></nlm-citation></ref><ref id="ref42"><label>42</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>&#x0160;libar</surname><given-names>B</given-names> </name><name name-style="western"><surname>Ore&#x0161;ki</surname><given-names>D</given-names> </name><name name-style="western"><surname>Begi&#x010D;evi&#x0107; Re&#x0111;ep</surname><given-names>N</given-names> </name></person-group><article-title>Importance of the open data assessment: an insight into the (Meta) data quality dimensions</article-title><source>Sage Open</source><year>2021</year><month>04</month><volume>11</volume><issue>2</issue><fpage>11</fpage><pub-id pub-id-type="doi">10.1177/21582440211023178</pub-id></nlm-citation></ref><ref id="ref43"><label>43</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Stausberg</surname><given-names>J</given-names> </name><name name-style="western"><surname>Harkener</surname><given-names>S</given-names> </name><name name-style="western"><surname>Engel</surname><given-names>C</given-names> </name><etal/></person-group><article-title>Cross-Registry Benchmarking of Data Quality: Lessons Learned</article-title><source>Stud Health Technol Inform</source><year>2023</year><month>05</month><day>18</day><volume>302</volume><fpage>167</fpage><lpage>171</lpage><pub-id pub-id-type="doi">10.3233/SHTI230096</pub-id><pub-id pub-id-type="medline">37203640</pub-id></nlm-citation></ref><ref id="ref44"><label>44</label><nlm-citation citation-type="web"><person-group person-group-type="author"><name name-style="western"><surname>Daumas</surname><given-names>J</given-names> </name><name name-style="western"><surname>Sch&#x00E4;fer</surname><given-names>A</given-names> </name><name name-style="western"><surname>Dolanski-Aghamanoukjan</surname><given-names>L</given-names> </name><etal/></person-group><article-title>Deliverable 1.1 specification of the data sets&#x2019; quality and utility label</article-title><source>Zenodo</source><year>2024</year><access-date>2025-12-29</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://zenodo.org/records/14937423">https://zenodo.org/records/14937423</ext-link></comment></nlm-citation></ref><ref id="ref45"><label>45</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Peters</surname><given-names>MDJ</given-names> </name><name name-style="western"><surname>Marnie</surname><given-names>C</given-names> </name><name name-style="western"><surname>Tricco</surname><given-names>AC</given-names> </name><etal/></person-group><article-title>Updated methodological guidance for the conduct of scoping reviews</article-title><source>JBI Evid Synth</source><year>2020</year><month>10</month><volume>18</volume><issue>10</issue><fpage>2119</fpage><lpage>2126</lpage><pub-id pub-id-type="doi">10.11124/JBIES-20-00167</pub-id><pub-id pub-id-type="medline">33038124</pub-id></nlm-citation></ref><ref id="ref46"><label>46</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Schuemie</surname><given-names>M</given-names> </name><name name-style="western"><surname>Reps</surname><given-names>J</given-names> </name><name name-style="western"><surname>Black</surname><given-names>A</given-names> </name><etal/></person-group><article-title>Health-Analytics Data to Evidence Suite (HADES): open-source software for observational research</article-title><source>Stud Health Technol Inform</source><year>2024</year><month>01</month><day>25</day><volume>310</volume><fpage>966</fpage><lpage>970</lpage><pub-id pub-id-type="doi">10.3233/SHTI231108</pub-id><pub-id pub-id-type="medline">38269952</pub-id></nlm-citation></ref><ref id="ref47"><label>47</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Richter</surname><given-names>A</given-names> </name><name name-style="western"><surname>Schmidt</surname><given-names>CO</given-names> </name><name name-style="western"><surname>Kr&#x00FC;ger</surname><given-names>M</given-names> </name><name name-style="western"><surname>Struckmann</surname><given-names>S</given-names> </name></person-group><article-title>DataquieR: assessment of data quality in epidemiological research</article-title><source>J Open Source Softw</source><year>2021</year><volume>6</volume><issue>61</issue><fpage>3093</fpage><pub-id pub-id-type="doi">10.21105/joss.03093</pub-id></nlm-citation></ref></ref-list><app-group><supplementary-material id="app1"><label>Multimedia Appendix 1</label><p>Original set of TMF indicators.</p><media xlink:href="jmir_v28i1e90482_app1.docx" xlink:title="DOCX File, 32 KB"/></supplementary-material><supplementary-material id="app2"><label>Multimedia Appendix 2</label><p>Search strings used for Medline, the Cochrane Library, the Web of Science, and Scopus.</p><media xlink:href="jmir_v28i1e90482_app2.docx" xlink:title="DOCX File, 26 KB"/></supplementary-material><supplementary-material id="app3"><label>Multimedia Appendix 3</label><p>PRISMA flow diagram.</p><media xlink:href="jmir_v28i1e90482_app3.png" xlink:title="PNG File, 135 KB"/></supplementary-material><supplementary-material id="app4"><label>Multimedia Appendix 4</label><p>Evidence sources per indicator in category data quality.</p><media xlink:href="jmir_v28i1e90482_app4.docx" xlink:title="DOCX File, 51 KB"/></supplementary-material><supplementary-material id="app5"><label>Multimedia Appendix 5</label><p>Evidence sources per indicator in category metadata quality.</p><media xlink:href="jmir_v28i1e90482_app5.docx" xlink:title="DOCX File, 32 KB"/></supplementary-material><supplementary-material id="app6"><label>Multimedia Appendix 6</label><p>Individual indicator specifications in version 0.8.</p><media xlink:href="jmir_v28i1e90482_app6.pdf" xlink:title="PDF File, 532 KB"/></supplementary-material></app-group></back></article>