<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="research-article"><front><journal-meta><journal-id journal-id-type="nlm-ta">J Med Internet Res</journal-id><journal-id journal-id-type="publisher-id">jmir</journal-id><journal-id journal-id-type="index">1</journal-id><journal-title>Journal of Medical Internet Research</journal-title><abbrev-journal-title>J Med Internet Res</abbrev-journal-title><issn pub-type="epub">1438-8871</issn><publisher><publisher-name>JMIR Publications</publisher-name><publisher-loc>Toronto, Canada</publisher-loc></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">v27i1e70314</article-id><article-id pub-id-type="doi">10.2196/70314</article-id><article-categories><subj-group subj-group-type="heading"><subject>Original Paper</subject></subj-group></article-categories><title-group><article-title>Methods for Analytical Validation of Novel Digital Clinical Measures: Implementation Feasibility Evaluation Using Real-World Datasets</article-title></title-group><contrib-group><contrib contrib-type="author"><name name-style="western"><surname>Turner</surname><given-names>Simon</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author" corresp="yes"><name name-style="western"><surname>Floden</surname><given-names>Lysbeth</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Simmatis</surname><given-names>Leif</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff3">3</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Fromy</surname><given-names>Piper</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff4">4</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Langford</surname><given-names>Joss</given-names></name><degrees>BSc</degrees><xref ref-type="aff" rid="aff5">5</xref><xref ref-type="aff" rid="aff6">6</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Daza</surname><given-names>Eric J</given-names></name><degrees>DrPH</degrees><xref ref-type="aff" rid="aff7">7</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Potter</surname><given-names>Andrew</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff8">8</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Troeger</surname><given-names>Kathleen</given-names></name><degrees>MPH</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><collab>the STAGES cohort investigator group</collab><xref ref-type="aff" rid="aff9">9</xref></contrib></contrib-group><aff id="aff1"><institution>Digital Medicine Society</institution><addr-line>Boston</addr-line><addr-line>MA</addr-line><country>United States</country></aff><aff id="aff2"><institution>Quantitative Science, Evinova</institution><addr-line>35 Gatehouse Drive</addr-line><addr-line>Waltham</addr-line><addr-line>MA</addr-line><country>United States</country></aff><aff id="aff3"><institution>Department of Speech-Language Pathology, Temerty Faculty of Medicine, University of Toronto</institution><addr-line>Toronto</addr-line><addr-line>ON</addr-line><country>Canada</country></aff><aff id="aff4"><institution>Seeing Theta</institution><addr-line>Saumur</addr-line><country>France</country></aff><aff id="aff5"><institution>Activinsights Ltd.</institution><addr-line>Kimbolton</addr-line><addr-line>Cambridgeshire</addr-line><country>United Kingdom</country></aff><aff id="aff6"><institution>Department of Public Health and Sport Sciences, University of Exeter</institution><addr-line>Exeter</addr-line><country>United Kingdom</country></aff><aff id="aff7"><institution>Stats-of-1</institution><addr-line>Menlo Park</addr-line><addr-line>CA</addr-line><country>United States</country></aff><aff id="aff8"><institution>Division of Biometrics I, Office of Biostatistics, Center for Drug Evaluation and Research, US Food and Drug Administration</institution><addr-line>Silver Spring</addr-line><addr-line>MD</addr-line><country>United States</country></aff><aff id="aff9"><institution>Stanford University</institution><addr-line>Stanford</addr-line><addr-line>CT</addr-line><country>United States</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Coristine</surname><given-names>Andrew</given-names></name></contrib></contrib-group><contrib-group><contrib contrib-type="reviewer"><name name-style="western"><surname>Bracken-Clarke</surname><given-names>Dara</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Megaritis</surname><given-names>Dimitrios</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Garcia-Gancedo</surname><given-names>Luis</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to Lysbeth Floden, PhD, Quantitative Science, Evinova, 35 Gatehouse Drive, Waltham, MA, 02451, United States, 1 (520) 360-3962; <email>libby.floden@evinova.com</email></corresp></author-notes><pub-date pub-type="collection"><year>2025</year></pub-date><pub-date pub-type="epub"><day>17</day><month>11</month><year>2025</year></pub-date><volume>27</volume><elocation-id>e70314</elocation-id><history><date date-type="received"><day>20</day><month>12</month><year>2024</year></date><date date-type="rev-recd"><day>04</day><month>07</month><year>2025</year></date><date date-type="accepted"><day>04</day><month>07</month><year>2025</year></date></history><copyright-statement>&#x00A9; Simon Turner, Lysbeth Floden, Leif Simmatis, Piper Fromy, Joss Langford, Eric J Daza, Andrew Potter, Kathleen Troeger, the STAGES cohort investigator group. Originally published in the Journal of Medical Internet Research (<ext-link ext-link-type="uri" xlink:href="https://www.jmir.org">https://www.jmir.org</ext-link>), 17.11.2025. </copyright-statement><copyright-year>2025</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in the Journal of Medical Internet Research (ISSN 1438-8871), is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="https://www.jmir.org/">https://www.jmir.org/</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://www.jmir.org/2025/1/e70314"/><abstract><sec><title>Background</title><p>Sensor-based digital health technologies (sDHTs) are increasingly used to support scientific and clinical decision-making. The digital measures (DMs) they generate offer significant potential to accelerate the drug development timeline, decrease clinical trial costs, and improve access to care. However, choosing an appropriate statistical methodology when conducting analytical validation (AV) of a DM is complicated, particularly for novel DMs, for which appropriate, established reference measures (RMs) may not exist. More understanding of, and a standardized approach to, AV in these scenarios is needed.</p></sec><sec><title>Objective</title><p>In a prior simulation study, 3 statistical methods were tested for their ability to estimate a simulated relationship between a sDHT-derived DM and several clinical outcome assessment (COA) RMs. The aim of this work was to assess the feasibility of implementation of these methods in real data and to examine the impact of AV study design factors on the relationships estimated.</p></sec><sec sec-type="methods"><title>Methods</title><p>Four real-world datasets, captured using sDHTs, were used to prepare hypothetical AV studies representing a range of scenarios with respect to 3 key study design properties: temporal coherence, construct coherence, and data completeness. The datasets analyzed were as follows: Urban Poor (comparing nighttime awakenings to measures of psychological well-being), STAGES (comparing daily step count to psychological and fatigue measures), mPower (comparing daily smartphone screen taps to measures of function in Parkinson&#x2019;s disease), and Brighten (comparing smartphone communication activity to measures of psychological well-being). For each hypothetical AV study, 3 statistical methods were leveraged: the Pearson correlation coefficient (PCC) between DM and RM, simple linear regression (SLR) between DM and RM, multiple linear regression (MLR) between DMs and combinations of RMs, and 2-factor, correlated-factor confirmatory factor analysis (CFA) models. Performance measures were the PCC magnitudes (for PCC), R<sup>2</sup> and adjusted R<sup>2</sup> statistics (for SLR and MLR, respectively), and factor correlations (for CFA).</p></sec><sec sec-type="results"><title>Results</title><p>Most of the CFA models exhibited an acceptable fit according to the majority of the fit statistics employed, and each model was able to estimate a factor correlation. For each model, these correlations were greater than or equal to the corresponding PCC in magnitude. Correlations were the strongest in the hypothetical studies with strong temporal and construct coherence.</p></sec><sec sec-type="conclusions"><title>Conclusions</title><p>The performance of the selected statistical methods shown in this work supports their feasibility when implemented in real-world data. Our findings, in particular, support the use of CFA to assess the relationship between a novel DM and a COA RM. The observed impact of AV study design factors on the relationships estimated allowed the authors to determine practical recommendations for study design in AV of novel DMs. By using a standardized methodology for evaluating novel DMs, sDHT developers, biostatisticians, and clinical researchers can navigate the complex validation landscape more easily, with more certainty, and with more tools at their disposal.</p></sec></abstract><kwd-group><kwd>digital health technologies</kwd><kwd>analytical validation</kwd><kwd>digital medicine</kwd><kwd>confirmatory factor analysis</kwd><kwd>novel digital clinical measures</kwd></kwd-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><p>Sensor-based digital health technologies (sDHTs) are increasingly used to support scientific and clinical decision-making. The digital measures (DMs) they generate offer significant benefits, including the potential to accelerate the drug development timeline, decrease clinical trial costs, and improve access to care [<xref ref-type="bibr" rid="ref1">1</xref>]. This potential has motivated considerable efforts to expand research into the application of novel digital measures to capture clinically relevant data and establish endpoints that the community has previously been unable to assess using traditional methods of data collection and statistical analysis [<xref ref-type="bibr" rid="ref2">2</xref>,<xref ref-type="bibr" rid="ref3">3</xref>].</p><p>A novel digital measure can be defined as either a measure that has not previously been assessable or an existing measure that is being applied in a new population, environment, or context of use.</p><p>The evaluation of the digital measures derived from sDHTs as fit for purpose is the first step in bringing the value of these technologies to the people who can benefit the most. The well-established V3+ framework [<xref ref-type="bibr" rid="ref4">4</xref>] and its recent extension to include usability [<xref ref-type="bibr" rid="ref5">5</xref>] provide a robust, modular framework for developers and regulators to follow when evaluating measures generated from sDHTs. The V3+ framework states that to support scientific and clinical decision-making, investigators must undertake verification of the sensor(s), usability validation of the sDHT, analytical validation (AV) of any algorithm(s) applied, and clinical validation of a measure of a clinical or functional state in a defined context of use.</p><p>AV represents a critical bridge between initial technology development (ie, verification) and clinical utility (ie, clinical validation). An AV study comprises reporting on the comparison between the output of a novel sDHT&#x2019;s algorithm and 1 or more reference measures (RMs).</p><p>While work exists that has developed standardized methodology for clinical validation [<xref ref-type="bibr" rid="ref6">6</xref>], the same methodology development and standardization is now required for AV. Of note, the difficulty in defining the performance requirements and in selecting the appropriate statistical methodology to assess against these requirements is of premier importance.</p><p>This difficulty is magnified when working with novel sDHTs for which appropriate, established RMs may not exist or may have limited applicability. For an example of this limitation, in speech, articulatory function assessed via digital audio recordings is a relatively straightforward measure to analytically validate because there are existing high-quality RMs that can form the basis of comparisons [<xref ref-type="bibr" rid="ref7">7</xref>]. However, for digital cognitive assessments, such comparisons may not be so straightforward as existing RMs may be restricted to instruments such as clinical outcome assessments (COAs) that capture multiple aspects of disease severity as a single semiquantitative score [<xref ref-type="bibr" rid="ref8">8</xref>]. The issue here is that the output of the sDHT and the RM does not directly correspond in such situations. This means that traditional analyses such as receiver operating characteristic curves and intraclass correlations are often not possible.</p><p>In a prior simulation study [<xref ref-type="bibr" rid="ref9">9</xref>], several statistical methods were tested for their ability to return a nonbiased estimate of the simulated relationship between an sDHT-derived DM and COA RMs. Simulation studies provide evidence for the feasibility of the methods in ideal situations; however, in data collected in practice, in either clinical or real-world settings, nuances can lead to issues such as model nonconvergence. Here, we examine the implementation of the methods previously examined in simulation, across several real-world datasets with varying data missingness, sample size, and theoretical relationship between the DM and RM. The aim of this work was to assess the feasibility of the methods&#x2019; implementation in real data and to examine the impact of AV study design factors on the relationships estimated. As with the prior simulation study [<xref ref-type="bibr" rid="ref9">9</xref>], COAs were used as the RMs in order to evaluate AV study design factors, to reflect situations where they comprise the only available RMs and thus represent the measurement target of interest.</p></sec><sec id="s2" sec-type="methods"><title>Methods</title><sec id="s2-1"><title>Selection of Datasets</title><p>Four open-access datasets were employed for this research; the Urban Poor dataset [<xref ref-type="bibr" rid="ref10">10</xref>,<xref ref-type="bibr" rid="ref11">11</xref>], the STAGES dataset [<xref ref-type="bibr" rid="ref10">10</xref>], the mPower dataset [<xref ref-type="bibr" rid="ref12">12</xref>], and the Brighten dataset [<xref ref-type="bibr" rid="ref13">13</xref>]. These datasets were selected based on several preferred characteristics:</p><list list-type="bullet"><list-item><p>At least 100 subject records (repeated measures were permitted)</p></list-item><list-item><p>Data captured using a sDHT</p></list-item><list-item><p>At least one sDHT variable (acting as the digital measure) that was:</p><list list-type="bullet"><list-item><p>Collected on seven or more consecutive days</p></list-item><list-item><p>A discrete variable, aggregated as an ordinal variable representing a record of events occurring</p></list-item><list-item><p>Either available as, or able to be summarized as, a daily summary format (eg, number of steps per day)</p></list-item></list></list-item></list><list list-type="bullet"><list-item><p>COAs to act as RMs that:</p><list list-type="bullet"><list-item><p>Assessed a similar construct to the sDHT variable(s)</p></list-item><list-item><p>Assessed each item on a Likert scale</p></list-item><list-item><p>At least 1 COA with a daily recall period and at least 1 COA with a multiday recall period</p><list list-type="bullet"><list-item><p>A COA with a daily recall period asks a participant to consider a single day when they answer, such as a global impression of severity [<xref ref-type="bibr" rid="ref14">14</xref>]. Conversely, a COA with a multiday recall period asks a participant to consider more than 1 day; for example, the PHQ-9 [<xref ref-type="bibr" rid="ref15">15</xref>] asks a participant to think about how they have felt over the preceding 2 weeks. All claims must be validated and verified and backed up with sufficient evidence (subject to regulatory review).</p></list-item></list></list-item></list></list-item></list><p>These characteristics were chosen to allow us to construct hypothetical AV studies in keeping with the V3+ framework, while respecting the prerequisite requirements for each chosen statistical method to function robustly. The 4 datasets selected represented a variety of quality in terms of key properties of an AV study design: temporal coherence, construct coherence, and data completeness (<xref ref-type="other" rid="box1">Textbox 1</xref>). The datasets selected also represent the best matches available that met most of the COA characteristics. <xref ref-type="table" rid="table1">Table 1</xref> summarizes the key properties of each of the 4 selected datasets.</p><boxed-text id="box1"><title> Analytical Validation Study Design Qualities.</title><p>Certain aspects of study design offer the best opportunity to observe a relationship between a digital measure and a reference measure, where such a relationship exists.</p><p>These include the following:</p><list list-type="bullet"><list-item><p><bold>Temporal coherence:</bold> the similarity between the periods of data collection for the measures.</p></list-item><list-item><p><bold>Construct coherence:</bold> the similarity between the theoretical underlying constructs being assessed by the measures.</p></list-item><list-item><p><bold>Data completeness:</bold> the level of data completeness in both the digital measure and reference measure data. Study design should have a strategy to maximize data completeness.</p></list-item></list></boxed-text><table-wrap id="t1" position="float"><label>Table 1.</label><caption><p>Summary of investigated datasets<sup><xref ref-type="table-fn" rid="table1fn1">a</xref></sup></p></caption><table id="table1" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Title</td><td align="left" valign="bottom">Usable sample size</td><td align="left" valign="bottom">Digital measure(s)</td><td align="left" valign="bottom">Reference measure(s)</td><td align="left" valign="bottom">Coherence characteristics</td></tr></thead><tbody><tr><td align="left" valign="top">Urban Poor</td><td align="left" valign="top">452</td><td align="left" valign="top">Number of awakenings during an entire night</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Rosenberg Self-Esteem Scale [<xref ref-type="bibr" rid="ref16">16</xref>]</p></list-item><list-item><p>Generalized Anxiety Disorder Questionnaire (GAD-7) [<xref ref-type="bibr" rid="ref17">17</xref>]</p></list-item><list-item><p>Patient Health Questionnaire (PHQ-9) [<xref ref-type="bibr" rid="ref15">15</xref>]</p></list-item><list-item><p>Daily single-item patient global impression of happiness [<xref ref-type="bibr" rid="ref11">11</xref>]</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Weak construct coherence (digital measure of sleep, reference measures of psychological well-being)</p></list-item><list-item><p>Weak temporal coherence (multiday recall reference measures collected at baseline, before digital measure data collection; interventional study creates a potential change in the state of the underlying construct being assessed)</p></list-item></list></td></tr><tr><td align="left" valign="top">STAGES</td><td align="left" valign="top">964</td><td align="left" valign="top">Daily step count</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Fatigue Severity Score (FSS) [<xref ref-type="bibr" rid="ref18">18</xref>]</p></list-item><list-item><p>Generalized Anxiety Disorder Questionnaire (GAD-7) [<xref ref-type="bibr" rid="ref17">17</xref>]</p></list-item><list-item><p>Patient Health Questionnaire (PHQ-9) [<xref ref-type="bibr" rid="ref15">15</xref>]</p></list-item><list-item><p>Nasal Obstruction Symptom Evaluation (NOSE) [<xref ref-type="bibr" rid="ref19">19</xref>]</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Weak construct coherence (digital measure of physical activity, reference measures of fatigue, psychological well-being, and breathing obstruction).</p></list-item><list-item><p>Weak temporal coherence (reference measures were collected at inconsistent times during the study with respect to the digital measure data collection).</p></list-item></list></td></tr><tr><td align="left" valign="top">mPower</td><td align="left" valign="top">1641</td><td align="left" valign="top">No. of smartphone screen taps during a daily tapping activity</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Selected questions from the Movement Disorder Society Unified Parkinson Disease Rating Scale (UPDRS) [<xref ref-type="bibr" rid="ref20">20</xref>]</p></list-item><list-item><p>Parkinson Disease Questionnaire (shortened version) (PDQ-8) [<xref ref-type="bibr" rid="ref21">21</xref>]</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Moderate-to-strong construct coherence (all measures targeted Parkinson disease, but both reference measures had broader scope than the digital measure).</p></list-item><list-item><p>Strong temporal coherence with minimal missing data.</p></list-item></list></td></tr><tr><td align="left" valign="top">Brighten</td><td align="left" valign="top">89</td><td align="left" valign="top">Three variables from daily passive smartphone communications data:<list list-type="bullet"><list-item><p>Unique numbers from incoming calls</p></list-item><list-item><p>Unique numbers from outgoing calls</p></list-item><list-item><p>Unique numbers from texts received</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Patient Health Questionnaire (PHQ-9) [<xref ref-type="bibr" rid="ref15">15</xref>]</p></list-item><list-item><p>Two-item daily version of the PHQ-9 (PHQ-2) [<xref ref-type="bibr" rid="ref22">22</xref>]</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Moderate-to-weak construct coherence (Data are not adjusted for a subject&#x2019;s normal behavioral habits)</p></list-item><list-item><p>Moderate-to-strong temporal coherence (digital measure data from the full recall period of the PHQ-9 were analyzed, although there was substantial reference measure data missingness).</p></list-item></list></td></tr></tbody></table><table-wrap-foot><fn id="table1fn1"><p><sup>a</sup>A full description of the datasets analyzed can be found in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>. Some of the datasets did not meet all the preferred characteristics. The Brighten data have a usable sample size less than 100; while there is a sufficient sample size (accounting for repeated measures) reported in the original study [<xref ref-type="bibr" rid="ref13">13</xref>], the distribution of data missingness led to excluding many records in our analysis. Furthermore, the STAGES and mPower data lack applicable reference measures with daily recall periods.</p></fn></table-wrap-foot></table-wrap></sec><sec id="s2-2"><title>Statistical Methods</title><sec id="s2-2-1"><title>Data Preparation</title><p>For each dataset, we prepared each measure&#x2019;s data for analysis via the following steps. Each step involved selecting, subsetting, or otherwise processing data values.</p></sec><sec id="s2-2-2"><title>Multiday Recall RM Data Selection</title><p>For each study participant, each RM administration instance (ie, instance of an RM being administered) was included for analysis and considered repeated measures. Thus, if a participant answered an RM 3 times during the study period, all 3 responses were used in analysis.</p><p>For each instance, the raw scores for the individual items were aggregated per participant by summing and then linearly scaling them to fit a scale ranging from 0 to 100. For example, the PHQ-9 measure is a 9-item PRO with each item response scored on a 0-3 scale [<xref ref-type="bibr" rid="ref15">15</xref>]. For each participant, raw scores were first summed, and the result was multiplied by 100/27 (analogous to the process of converting a raw score to a percentage). RM data values already on a 0-100 scale were assumed to be ready for analysis and were not modified.</p></sec><sec id="s2-2-3"><title>Digital Measure Data Selection</title><p>For each study participant and for each multiday recall RM instance, we analyzed digital measure data that corresponded to the recall period of the RM. For example, the PHQ-9 has a recall period of 2 weeks. Thus, if a participant answered the PHQ-9 on January 14, then only digital measure data values from January 1 to January 14 inclusive were used in the analysis.</p><p>From this subset of digital measure data, we selected the 7 days of data closest to the RM administration instance. The 7-day criteria have been shown to be sufficient to achieve reliable data across a spectrum of populations and contexts of use [<xref ref-type="bibr" rid="ref23">23</xref>-<xref ref-type="bibr" rid="ref25">25</xref>]. Continuing the above example, if digital measure data were captured on all 14 days of the PHQ-9 recall period, then the 7 days of data selected for analysis would be January 8-January 14. If fewer than 7 days of digital measure data were observed during the RM recall period, then all such days were used in the analysis; all data values on the remaining days were treated as missing.</p></sec><sec id="s2-2-4"><title>Daily RM Data Selection</title><p>For each study participant, we analyzed daily RM data that corresponded to the selected digital measure data. Continuing the above example, the 7 or fewer days of daily RM data selected for this participant would come from the period of January 8-January 14 inclusive. If daily RM data were not recorded on some days in this window, then these data values were treated as missing.</p></sec><sec id="s2-2-5"><title>Further Processing of the Digital Measure Data and Daily RM Data</title><p>To properly deploy the full range of statistical methods for modeling and factor analysis, data values of the digital and daily RMs needed to be aggregated to match the administration cadence of the multiday recall RMs. This was accomplished by calculating the mean of all observed data values at each administration instance of a multiday recall RM, for each participant.</p><p>Continuing the above example, we would calculate a study participant&#x2019;s mean digital measure &#x201C;score&#x201D; (ie, mean data value) over the period of January 8-January 14, inclusive. Likewise, we would calculate the mean daily RM score from the same January 8-January 14 window.</p></sec></sec><sec id="s2-3"><title>Data Analysis</title><p><xref ref-type="table" rid="table2">Table 2</xref> presents a summary of the statistical approaches used in this work.</p><table-wrap id="t2" position="float"><label>Table 2.</label><caption><p>Summary of statistical methods and evaluation criteria.</p></caption><table id="table2" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Analysis</td><td align="left" valign="bottom">Type</td><td align="left" valign="bottom">Description</td><td align="left" valign="bottom">Evaluation criteria</td></tr></thead><tbody><tr><td align="left" valign="top">PCC<sup><xref ref-type="table-fn" rid="table2fn1">a</xref></sup></td><td align="left" valign="top">Correlation</td><td align="left" valign="top">PCC between DM<sup><xref ref-type="table-fn" rid="table2fn2">b</xref></sup> and individual RMs<sup><xref ref-type="table-fn" rid="table2fn3">c</xref></sup>.</td><td align="left" valign="top">The magnitude and sign of the PCC.</td></tr><tr><td align="left" valign="top">SLR<sup><xref ref-type="table-fn" rid="table2fn4">d</xref></sup></td><td align="left" valign="top">Regression</td><td align="left" valign="top">SLR between DM and individual RMs.</td><td align="left" valign="top">Coefficient of determination (R<sup>2</sup>).</td></tr><tr><td align="left" valign="top">MLR<sup><xref ref-type="table-fn" rid="table2fn6">f</xref></sup></td><td align="left" valign="top">Regression</td><td align="left" valign="top">MLR between DM and combinations of individual RMs.</td><td align="left" valign="top">Adjusted coefficient of determination (R<sup>2</sup>).</td></tr><tr><td align="left" valign="top">CFA<sup><xref ref-type="table-fn" rid="table2fn5">e</xref></sup></td><td align="left" valign="top">Factor analysis</td><td align="left" valign="top">Two-factor confirmatory factor analysis of combinations of DM and RM data, modeled with correlations between latent factors.</td><td align="left" valign="top">CFI,<sup><xref ref-type="table-fn" rid="table2fn7">g</xref></sup> TLI<sup><xref ref-type="table-fn" rid="table2fn8">h</xref></sup>, RMSEA,<sup><xref ref-type="table-fn" rid="table2fn9">i</xref></sup> SRMR<sup><xref ref-type="table-fn" rid="table2fn10">j</xref></sup></td></tr></tbody></table><table-wrap-foot><fn id="table2fn1"><p><sup>a</sup>PCC: Pearson correlation coefficient.</p></fn><fn id="table2fn2"><p><sup>b</sup>DM: digital measure.</p></fn><fn id="table2fn3"><p><sup>c</sup>RM: reference measure.</p></fn><fn id="table2fn4"><p><sup>d</sup>SLR: simple linear regression.</p></fn><fn id="table2fn5"><p><sup>e</sup>CFA: confirmatory factor analysis.</p></fn><fn id="table2fn6"><p><sup>f</sup>MLR: multiple linear regression.</p></fn><fn id="table2fn7"><p><sup>g</sup>CFI: comparative fit index.</p></fn><fn id="table2fn8"><p><sup>h</sup>TLI: Tucker&#x2013;Lewis index.</p></fn><fn id="table2fn9"><p><sup>i</sup>RMSEA: root mean square error of approximation.</p></fn><fn id="table2fn10"><p><sup>j</sup>SRMR: standardized root mean square residual.</p></fn></table-wrap-foot></table-wrap><p>Pearson correlation coefficients (PCCs), confirmatory factor analysis (CFA), and linear regression were used to analyze each dataset, following the same methodology in each case. A full description of the data analysis methods can be found in <xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>; a summary of the methods appears below.</p><p>In each dataset, PCCs were calculated between each digital measure and each multiday recall RM.</p><p>Two-factor, correlated-factor CFA models were created for each combination of digital measure and multiday recall RM. CFA was selected, given its ability to model measurement error more explicitly than PCC as well as its insensitivity to scale differences (due to factors being computed from correlations, removing the influence of input variable scale), which we anticipated may be a useful property when dealing with measures containing multiple items/measures collected across sessions. It is additionally able to handle a range of measurement units/data types (continuous, ordinal, etc), which makes it well-suited to the problem of dealing with questionnaire data as well as sensor-derived data [<xref ref-type="bibr" rid="ref26">26</xref>,<xref ref-type="bibr" rid="ref27">27</xref>]. The correlation between the factors was calculated and used as the estimate of the relationship between the DM and RM. Four model fit statistics were computed for each model: Comparative Fit Index (CFI), Tucker&#x2013;Lewis Index (TLI), root mean square error of approximation (RMSEA), and standardized root mean square residual (SRMR). The fit statistics were evaluated against the following thresholds to determine if each model was an acceptable fit to the data [<xref ref-type="bibr" rid="ref28">28</xref>,<xref ref-type="bibr" rid="ref29">29</xref>]: CFI and TLI acceptable fit: values&#x2265;0.9, and RMSEA and SRMR acceptable fit: values&#x003C;0.08.</p><p>Simple linear regression (SLR) models were created to model the relationship between the digital measures and each multiday recall RM. Multiple linear regression (MLR) models were created to model the relationship between each digital measure and every combination of daily and multiday recall RMs available. R<sup>2</sup> values were calculated for each model.</p><p>All analyses were performed using R statistical software v4.1.2 [<xref ref-type="bibr" rid="ref30">30</xref>] along with several additional packages. The additional packages include the following: dplyr, readxl, stringr, and lubridate for data preparation; and lavaan and tibble for data analysis.</p><p>All packages were used in their September 2024 latest versions.</p></sec><sec id="s2-4"><title>Ethical Considerations</title><p>This study is a secondary use of data that are publicly available and have undergone institutional review board (IRB) review(s). Brief details of data access and ethical reviews undertaken by the teams that prepared each dataset are provided below.</p><p>The Urban Poor dataset is licensed under CC0 1.0 (public domain). Participants in this study provided informed consent, including information on the specific data collection methods used. Hypotheses of the study were not shared with the participants, but participants were told that the study was described as work to understand the &#x201C;difficulties underprivileged people in India face, and how these problems affect their lives.&#x201D; [<xref ref-type="bibr" rid="ref10">10</xref>,<xref ref-type="bibr" rid="ref11">11</xref>].</p><p>Data from the STAGES dataset are published openly on the National Sleep Research Resource for commercial and noncommercial use by the STAGES study team. Data use agreements were sought by the STAGES study team with individual research institutions to ensure compliance with specific IRBs&#x2019; policies. Detailed ethics and consent procedures are available as part of the open data release package [<xref ref-type="bibr" rid="ref10">10</xref>].</p><p>Coded data from the mPower dataset are published openly on Synapse. E-consent was obtained from study participants before analysis and data sharing, including a distinction between &#x201C;narrow&#x201D; data sharing (ie, with only the mPower study team) or openly among the broader research community. Ethical oversight of the study was provided by Western IRB [<xref ref-type="bibr" rid="ref12">12</xref>].</p><p>Data from the Brighten dataset are publicly available via Synapse. Informed consent was obtained before enrollment in the study. Ethical approval for the original study data collection was obtained via the University of California (San Francisco) Committee for Human Research [<xref ref-type="bibr" rid="ref13">13</xref>].</p><p>Additionally, no identification of individual participants is possible from our use of the datasets in our hypothetical AV studies.</p></sec></sec><sec id="s3" sec-type="results"><title>Results</title><p>The results are presented in two parts: first, the functioning of the methods, and, second, the results arising from those methods, ie, the relationships between the measures that were estimated.</p><sec id="s3-1"><title>Functioning of the Methods</title><p>In each dataset, results were successfully obtained for each of the methods investigated, and, in particular, each of the CFA models converged, which indicates that our chosen models can be fitted to the data.</p></sec><sec id="s3-2"><title>CFA Model Fit</title><p>Using the thresholds of acceptable fit detailed above, the model fit statistics suggested that the models in the Urban Poor, STAGES, and mPower datasets had an acceptable fit (<xref ref-type="table" rid="table3">Tables 3</xref><xref ref-type="table" rid="table4"/>-<xref ref-type="table" rid="table5">5</xref>). In the Brighten dataset, the fit statistics were less clear, returning a mixed acceptability of the fit between each of the 4 calculated fit statistics (<xref ref-type="table" rid="table6">Table 6</xref>).</p><table-wrap id="t3" position="float"><label>Table 3.</label><caption><p>Urban Poor CFA fit statistics.</p></caption><table id="table3" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom" rowspan="2">Reference measure</td><td align="left" valign="bottom" colspan="4">CFA<sup><xref ref-type="table-fn" rid="table3fn1">a</xref></sup> model fit measure<sup><xref ref-type="table-fn" rid="table3fn2">b</xref></sup></td></tr><tr><td align="left" valign="bottom">CFI<sup><xref ref-type="table-fn" rid="table3fn3">c</xref></sup></td><td align="left" valign="bottom">TLI<sup><xref ref-type="table-fn" rid="table3fn4">d</xref></sup></td><td align="left" valign="bottom">RMSEA<sup><xref ref-type="table-fn" rid="table3fn5">e</xref></sup></td><td align="left" valign="bottom">SRMR<sup><xref ref-type="table-fn" rid="table3fn6">f</xref></sup></td></tr></thead><tbody><tr><td align="left" valign="top">Rosenberg<sup><xref ref-type="table-fn" rid="table3fn7">g</xref></sup></td><td align="left" valign="top">0.913</td><td align="left" valign="top">0.900</td><td align="left" valign="top">0.081</td><td align="left" valign="top">0.079</td></tr><tr><td align="left" valign="top">GAD-7<sup><xref ref-type="table-fn" rid="table3fn8">h</xref></sup></td><td align="left" valign="top">1.000</td><td align="left" valign="top">1.000</td><td align="left" valign="top">0.000</td><td align="left" valign="top">0.034</td></tr><tr><td align="left" valign="top">PHQ-9<sup><xref ref-type="table-fn" rid="table3fn9">i</xref></sup></td><td align="left" valign="top">0.994</td><td align="left" valign="top">0.993</td><td align="left" valign="top">0.024</td><td align="left" valign="top">0.042</td></tr></tbody></table><table-wrap-foot><fn id="table3fn1"><p><sup>a</sup>CFA:  Confirmatory Factor Analysis.</p></fn><fn id="table3fn2"><p><sup>b</sup>CFI and TLI acceptable fit: &#x2265; 0.90, RMSEA and SRMR acceptable fit: &#x003C; 0.08.</p></fn><fn id="table3fn3"><p><sup>c</sup>CFI:  Comparative Fit Index.</p></fn><fn id="table3fn4"><p><sup>d</sup>TLI:  Tucker-Lewis Index.</p></fn><fn id="table3fn5"><p><sup>e</sup>RMSEA: Root Mean Square Error of Approximation.</p></fn><fn id="table3fn6"><p><sup>f</sup>SRMR:  Standardized Root Mean Square Residual. </p></fn><fn id="table3fn7"><p><sup>g</sup>Rosenberg:  Rosenberg Self-Esteem Scale.</p></fn><fn id="table3fn8"><p><sup>h</sup>GAD-7:  Generalized Anxiety Disorder Questionnaire.</p></fn><fn id="table3fn9"><p><sup>i</sup>PHQ-9:  Patient Health Questionnaire-9.</p></fn></table-wrap-foot></table-wrap><table-wrap id="t4" position="float"><label>Table 4.</label><caption><p>STAGES CFA fit statistics.</p></caption><table id="table4" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom" rowspan="2">Reference measure</td><td align="left" valign="bottom" colspan="4">CFA<sup><xref ref-type="table-fn" rid="table4fn1">a</xref></sup> model fit measure<sup><xref ref-type="table-fn" rid="table4fn2">b</xref></sup></td></tr><tr><td align="left" valign="bottom">CFI<sup><xref ref-type="table-fn" rid="table4fn3">c</xref></sup></td><td align="left" valign="bottom">TLI<sup><xref ref-type="table-fn" rid="table4fn4">d</xref></sup></td><td align="left" valign="bottom">RMSEA<sup><xref ref-type="table-fn" rid="table4fn5">e</xref></sup></td><td align="left" valign="bottom">SRMR<sup><xref ref-type="table-fn" rid="table4fn6">f</xref></sup></td></tr></thead><tbody><tr><td align="left" valign="top">FSS<sup><xref ref-type="table-fn" rid="table4fn7">g</xref></sup></td><td align="left" valign="top">0.997</td><td align="left" valign="top">0.996</td><td align="left" valign="top">0.223</td><td align="left" valign="top">0.043</td></tr><tr><td align="left" valign="top">GAD-7<sup><xref ref-type="table-fn" rid="table4fn8">h</xref></sup></td><td align="left" valign="top">0.997</td><td align="left" valign="top">0.996</td><td align="left" valign="top">0.255</td><td align="left" valign="top">0.037</td></tr><tr><td align="left" valign="top">PHQ-9<sup><xref ref-type="table-fn" rid="table4fn9">i</xref></sup></td><td align="left" valign="top">0.996</td><td align="left" valign="top">0.996</td><td align="left" valign="top">0.238</td><td align="left" valign="top">0.061</td></tr><tr><td align="left" valign="top">NOSE<sup><xref ref-type="table-fn" rid="table4fn10">j</xref></sup></td><td align="left" valign="top">0.997</td><td align="left" valign="top">0.996</td><td align="left" valign="top">0.314</td><td align="left" valign="top">0.063</td></tr></tbody></table><table-wrap-foot><fn id="table4fn1"><p><sup>a</sup>CFA:  Confirmatory Factor Analysis.</p></fn><fn id="table4fn2"><p><sup>b</sup>CFI and TLI acceptable fit: &#x2265; 0.90, RMSEA and SRMR acceptable fit: &#x003C; 0.08.</p></fn><fn id="table4fn3"><p><sup>c</sup>CFI: Comparative Fit Index.</p></fn><fn id="table4fn4"><p><sup>d</sup>TLI:  Tucker-Lewis Index.</p></fn><fn id="table4fn5"><p><sup>e</sup>RMSEA:  Root Mean Square Error of Approximation.</p></fn><fn id="table4fn6"><p><sup>f</sup>SRMR:  Standardized Root Mean Square Residual.</p></fn><fn id="table4fn7"><p><sup>g</sup>FSS:  Fatigue Severity Score.</p></fn><fn id="table4fn8"><p><sup>h</sup>GAD-7:  Generalized Anxiety Disorder Questionnaire.</p></fn><fn id="table4fn9"><p><sup>i</sup>PHQ-9:  Patient Health Questionnaire-9.</p></fn><fn id="table4fn10"><p><sup>j</sup>NOSE:  Nasal Obstruction Symptom Evaluation.</p></fn></table-wrap-foot></table-wrap><table-wrap id="t5" position="float"><label>Table 5.</label><caption><p>mPower CFA fit statistics.</p></caption><table id="table5" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom" rowspan="2">Reference measure</td><td align="left" valign="bottom" colspan="4">CFA<sup><xref ref-type="table-fn" rid="table5fn1">a</xref></sup> model fit measure<sup><xref ref-type="table-fn" rid="table5fn2">b</xref></sup></td></tr><tr><td align="left" valign="bottom">CFI<sup><xref ref-type="table-fn" rid="table5fn3">c</xref></sup></td><td align="left" valign="bottom">TLI<sup><xref ref-type="table-fn" rid="table5fn4">d</xref></sup></td><td align="left" valign="bottom">RMSEA<sup><xref ref-type="table-fn" rid="table5fn5">e</xref></sup></td><td align="left" valign="bottom">SRMR<sup><xref ref-type="table-fn" rid="table5fn6">f</xref></sup></td></tr></thead><tbody><tr><td align="left" valign="top">UPDRS<sup><xref ref-type="table-fn" rid="table5fn7">g</xref></sup></td><td align="left" valign="top">1.000</td><td align="left" valign="top">1.004</td><td align="left" valign="top">0.000</td><td align="left" valign="top">0.060</td></tr><tr><td align="left" valign="top">PDQ-8<sup><xref ref-type="table-fn" rid="table5fn8">h</xref></sup></td><td align="left" valign="top">0.957</td><td align="left" valign="top">0.953</td><td align="left" valign="top">0.067</td><td align="left" valign="top">0.088</td></tr></tbody></table><table-wrap-foot><fn id="table5fn1"><p><sup>a</sup>CFA:  Confirmatory Factor Analysis.</p></fn><fn id="table5fn2"><p><sup>b</sup>CFI and TLI acceptable fit: &#x2265; 0.90, RMSEA and SRMR acceptable fit: &#x003C; 0.08.</p></fn><fn id="table5fn3"><p><sup>c</sup>CFI: Comparative Fit Index.</p></fn><fn id="table5fn4"><p><sup>d</sup>TLI: Tucker-Lewis Index.</p></fn><fn id="table5fn5"><p><sup>e</sup>RMSEA:  Root Mean Square Error of Approximation.</p></fn><fn id="table5fn6"><p><sup>f</sup>SRMR: Standardized Root Mean Square Residual.</p></fn><fn id="table5fn7"><p><sup>g</sup>UPDRS: Movement Disorder Society Unified Parkinson&#x2019;s Disease Rating Scale (selected questions). </p></fn><fn id="table5fn8"><p><sup>h</sup>PDQ-8: Parkinson&#x2019;s Disease Questionnaire (shortened version).</p></fn></table-wrap-foot></table-wrap><table-wrap id="t6" position="float"><label>Table 6.</label><caption><p>Brighten CFA fit statistics.<sup><xref ref-type="table-fn" rid="table6fn1">a</xref></sup></p></caption><table id="table6" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom" rowspan="2">Digital measure</td><td align="left" valign="bottom" colspan="4">CFA<sup><xref ref-type="table-fn" rid="table6fn2">b</xref></sup> model fit measure<sup><xref ref-type="table-fn" rid="table6fn3">c</xref></sup></td></tr><tr><td align="left" valign="bottom">CFI<sup><xref ref-type="table-fn" rid="table6fn4">d</xref></sup></td><td align="left" valign="bottom">TLI<sup><xref ref-type="table-fn" rid="table6fn5">e</xref></sup></td><td align="left" valign="bottom">RMSEA<sup><xref ref-type="table-fn" rid="table6fn6">f</xref></sup></td><td align="left" valign="bottom">SRMR<sup><xref ref-type="table-fn" rid="table6fn7">g</xref></sup></td></tr></thead><tbody><tr><td align="left" valign="top">Unique numbers calls incoming</td><td align="left" valign="top">0.906</td><td align="left" valign="top">0.890</td><td align="left" valign="top">0.151</td><td align="left" valign="top">0.106</td></tr><tr><td align="left" valign="top">Unique numbers call outgoing</td><td align="left" valign="top">0.965</td><td align="left" valign="top">0.959</td><td align="left" valign="top">0.504</td><td align="left" valign="top">0.131</td></tr><tr><td align="left" valign="top">Unique numbers texts received</td><td align="left" valign="top">0.968</td><td align="left" valign="top">0.963</td><td align="left" valign="top">0.311</td><td align="left" valign="top">0.121</td></tr></tbody></table><table-wrap-foot><fn id="table6fn1"><p><sup>a</sup>All statistics use the Patient Health Questionnaire-9 reference measure.</p></fn><fn id="table6fn2"><p><sup>b</sup>CFA: Confirmatory Factor Analysis.</p></fn><fn id="table6fn3"><p><sup>c</sup>CFI and TLI acceptable fit: &#x2265; 0.90, RMSEA and SRMR acceptable fit: &#x003C; 0.08.</p></fn><fn id="table6fn4"><p><sup>d</sup>CFI: Comparative Fit Index.</p></fn><fn id="table6fn5"><p><sup>e</sup>TLI: Tucker-Lewis Index.</p></fn><fn id="table6fn6"><p><sup>f</sup>RMSEA: Root Mean Square Error of Approximation.</p></fn><fn id="table6fn7"><p><sup>g</sup>SRMR:  Standardized Root Mean Square Residual.</p></fn></table-wrap-foot></table-wrap><p>The results were examined in more detail. When assessed using the CFI, each CFA model in each of the 4 datasets had an acceptable fit.</p><p>When assessed using TLI, all the CFA models had an acceptable fit, except for one of the 3 models built for the Brighten data.</p><p>When assessed using SRMR, there was agreement with CFI and TLI in the Urban Poor and STAGES datasets&#x2014;the fit was acceptable in each model in these datasets. However, when assessing the Brighten model, SRMR deemed each of the models to have an unacceptable fit, in contrast to the assessment from CFI and TLI. When assessing the mPower model, the UPDRS model had an acceptable fit, but the PDQ-8 model did not.</p><p>When assessed using RMSEA, each model in the STAGES and Brighten datasets had an unacceptable fit. In the Urban Poor dataset, the CFA models using GAD-7 and PHQ-9 as the RM were deemed to be an acceptable fit according to RMSEA; however, the model fit when using the Rosenberg Self-Esteem scale as the RM was unacceptable. In the mPower dataset, all models had an acceptable fit according to RMSEA.</p></sec><sec id="s3-3"><title>Relationships Estimated</title><sec id="s3-3-1"><title>Correlations</title><p>The magnitude of the calculated correlations (<xref ref-type="table" rid="table7">Tables 7</xref><xref ref-type="table" rid="table8"/><xref ref-type="table" rid="table9"/>-<xref ref-type="table" rid="table10">10</xref>) varied depending on the dataset and the choice of digital and RMs. In the Urban Poor data, all the estimated relationships were negligible (maximum magnitude 0.052, minimum magnitude 0.001); in the STAGES data, the magnitude of the relationships varied between 0.087 and 0.180. Larger relationships were observed in the Brighten data (maximum magnitude 0.175 and 0.340 for Pearson correlation and CFA correlation, respectively) and mPower data (maximum magnitude &#x2212;0.329 for both types of correlation).</p><table-wrap id="t7" position="float"><label>Table 7.</label><caption><p>Urban poor correlation values.</p></caption><table id="table7" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Reference measure</td><td align="left" valign="bottom">Pearson correlation</td><td align="left" valign="bottom">CFA<sup><xref ref-type="table-fn" rid="table7fn1">a</xref></sup> factor correlation</td></tr></thead><tbody><tr><td align="left" valign="top">Rosenberg<sup><xref ref-type="table-fn" rid="table7fn2">b</xref></sup></td><td align="left" valign="top">0.001</td><td align="left" valign="top">&#x2212;0.028</td></tr><tr><td align="left" valign="top">GAD-7<sup><xref ref-type="table-fn" rid="table7fn3">c</xref></sup></td><td align="left" valign="top">&#x2212;0.032</td><td align="left" valign="top">&#x2212;0.052</td></tr><tr><td align="left" valign="top">PHQ-9<sup><xref ref-type="table-fn" rid="table7fn4">d</xref></sup></td><td align="left" valign="top">&#x2212;0.021</td><td align="left" valign="top">&#x2212;0.022</td></tr></tbody></table><table-wrap-foot><fn id="table7fn1"><p><sup>a</sup>CFA: Confirmatory Factor Analysis.</p></fn><fn id="table7fn2"><p><sup>b</sup>Rosenberg: Rosenberg Self-Esteem Scale.</p></fn><fn id="table7fn3"><p><sup>c</sup>GAD-7: Generalized Anxiety Disorder Questionnaire.</p></fn><fn id="table7fn4"><p><sup>d</sup>PHQ-9: Patient Health Questionnaire-9.</p></fn></table-wrap-foot></table-wrap><table-wrap id="t8" position="float"><label>Table 8.</label><caption><p>STAGES correlation values.</p></caption><table id="table8" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Reference measure</td><td align="left" valign="bottom">Pearson correlation</td><td align="left" valign="bottom">CFA<sup><xref ref-type="table-fn" rid="table8fn1">a</xref></sup> factor correlation</td></tr></thead><tbody><tr><td align="left" valign="top">FSS<sup><xref ref-type="table-fn" rid="table8fn2">b</xref></sup></td><td align="left" valign="top">&#x2212;0.178</td><td align="left" valign="top">&#x2212;0.180</td></tr><tr><td align="left" valign="top">GAD-7<sup><xref ref-type="table-fn" rid="table8fn3">c</xref></sup></td><td align="left" valign="top">&#x2212;0.087</td><td align="left" valign="top">&#x2212;0.099</td></tr><tr><td align="left" valign="top">PHQ-9<sup><xref ref-type="table-fn" rid="table8fn4">d</xref></sup></td><td align="left" valign="top">&#x2212;0.161</td><td align="left" valign="top">&#x2212;0.175</td></tr><tr><td align="left" valign="top">NOSE<sup><xref ref-type="table-fn" rid="table8fn5">e</xref></sup></td><td align="left" valign="top">&#x2212;0.109</td><td align="left" valign="top">&#x2212;0.120</td></tr></tbody></table><table-wrap-foot><fn id="table8fn1"><p><sup>a</sup>CFA: Confirmatory Factor Analysis.</p></fn><fn id="table8fn2"><p><sup>b</sup>FSS: Fatigue Severity Score. </p></fn><fn id="table8fn3"><p><sup>c</sup>GAD-7: Generalized Anxiety Disorder Questionnaire.</p></fn><fn id="table8fn4"><p><sup>d</sup>PHQ-9: Patient Health Questionnaire-9.</p></fn><fn id="table8fn5"><p><sup>e</sup>NOSE: Nasal Obstruction Symptom Evaluation.</p></fn></table-wrap-foot></table-wrap><table-wrap id="t9" position="float"><label>Table 9.</label><caption><p>mPower correlation values.</p></caption><table id="table9" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Reference measure</td><td align="left" valign="bottom">Pearson correlation</td><td align="left" valign="bottom">CFA<sup><xref ref-type="table-fn" rid="table9fn1">a</xref></sup> factor correlation</td></tr></thead><tbody><tr><td align="left" valign="top">UPDRS<sup><xref ref-type="table-fn" rid="table9fn2">b</xref></sup></td><td align="left" valign="top">&#x2212;0.329</td><td align="left" valign="top">&#x2212;0.329</td></tr><tr><td align="left" valign="top">PDQ-8<sup><xref ref-type="table-fn" rid="table9fn3">c</xref></sup></td><td align="left" valign="top">&#x2212;0.299</td><td align="left" valign="top">&#x2212;0.319</td></tr></tbody></table><table-wrap-foot><fn id="table9fn1"><p><sup>a</sup>CFA: Confirmatory Factor Analysis.</p></fn><fn id="table9fn2"><p><sup>b</sup>UPDRS: Movement Disorder Society Unified Parkinson&#x2019;s Disease Rating Scale (selected questions). </p></fn><fn id="table9fn3"><p><sup>c</sup>PDQ-8: Parkinson&#x2019;s Disease Questionnaire (shortened version).</p></fn></table-wrap-foot></table-wrap><table-wrap id="t10" position="float"><label>Table 10.</label><caption><p>Brighten correlation values<sup><xref ref-type="table-fn" rid="table10fn1">a</xref></sup></p></caption><table id="table10" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Digital measure</td><td align="left" valign="bottom">Pearson correlation</td><td align="left" valign="bottom">CFA<sup><xref ref-type="table-fn" rid="table10fn2">b</xref></sup> factor correlation</td></tr></thead><tbody><tr><td align="left" valign="top">Unique numbers calls incoming</td><td align="left" valign="top">0.024</td><td align="left" valign="top">0.213</td></tr><tr><td align="left" valign="top">Unique numbers call outgoing</td><td align="left" valign="top">0.175</td><td align="left" valign="top">0.340</td></tr><tr><td align="left" valign="top">Unique numbers texts received</td><td align="left" valign="top">0.037</td><td align="left" valign="top">0.147</td></tr></tbody></table><table-wrap-foot><fn id="table10fn1"><p><sup>a</sup>All statistics use the PHQ-9 reference measure.</p></fn><fn id="table10fn2"><p><sup>b</sup>CFA: Confirmatory Factor Analysis.</p></fn></table-wrap-foot></table-wrap><p>In all scenarios, the CFA factor correlation was larger in magnitude than the Pearson correlation; this difference in magnitude was subtle in the Urban Poor set (where all relationships were negligible), the STAGES data (between 10% and 15% difference), and the mPower data (where despite the larger magnitude in relationships, the difference between the two correlation types was of a similar magnitude to the Urban Poor data). However, the difference in correlation magnitude was much more noticeable in the Brighten set; CFA factor correlation was at least twice as large as Pearson Correlation in every scenario.</p></sec><sec id="s3-3-2"><title>Regressions</title><p>In the Urban Poor, STAGES, and Brighten datasets, the calculated R<sup>2</sup> values (either standard or adjusted; ) <xref ref-type="table" rid="table11">Tables 11</xref><xref ref-type="table" rid="table12"/>-<xref ref-type="table" rid="table13">13</xref> were negligible. There was a trend for the R<sup>2</sup> values to be greater in magnitude in the Brighten dataset than in the STAGES dataset, which were in turn generally greater than those exhibited in the Urban Poor dataset.</p><p>In the mPower dataset (<xref ref-type="table" rid="table14">Table 14</xref>), the R<sup>2</sup> values were much larger in magnitude than in the other datasets, although still small in general, with values between 0.123 and 0.139.</p><table-wrap id="t11" position="float"><label>Table 11.</label><caption><p>Urban Poor R<sup>2</sup> values<sup><xref ref-type="table-fn" rid="table11fn1">a</xref></sup></p></caption><table id="table11" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Regression model type</td><td align="left" valign="bottom">Reference measure(s) included in the regression model</td><td align="left" valign="bottom">R<sup>2</sup> (standard or adjusted as appropriate)</td></tr></thead><tbody><tr><td align="left" valign="top" rowspan="3">SLR<sup><xref ref-type="table-fn" rid="table11fn2">b</xref></sup></td><td align="left" valign="top">Rosenberg<sup><xref ref-type="table-fn" rid="table11fn3">c</xref></sup></td><td align="left" valign="top">&#x003C;&#x003C;0.001</td></tr><tr><td align="left" valign="top">GAD-7<sup><xref ref-type="table-fn" rid="table11fn5">e</xref></sup></td><td align="left" valign="top">0.001</td></tr><tr><td align="left" valign="top">PHQ-9<sup><xref ref-type="table-fn" rid="table11fn6">f</xref></sup></td><td align="left" valign="top">0.001</td></tr><tr><td align="left" valign="top" rowspan="3">MLR<sup><xref ref-type="table-fn" rid="table11fn4">d</xref></sup></td><td align="left" valign="top">All weekly surveys</td><td align="left" valign="top">&#x2212;0.005</td></tr><tr><td align="left" valign="top">All + daily (mean values)</td><td align="left" valign="top">&#x2212;0.003</td></tr><tr><td align="left" valign="top">All + daily (individual days)</td><td align="left" valign="top">&#x2212;0.005</td></tr></tbody></table><table-wrap-foot><fn id="table11fn1"><p><sup>a</sup>The daily survey is a single-item global impression of happiness.</p></fn><fn id="table11fn2"><p><sup>b</sup>SLR: simple linear regression.</p></fn><fn id="table11fn3"><p><sup>c</sup>Rosenberg: Rosenberg Self-Esteem Scale.</p></fn><fn id="table11fn4"><p><sup>d</sup>MLR: multiple linear regression.</p></fn><fn id="table11fn5"><p><sup>e</sup>GAD-7: Generalized Anxiety Disorder Questionnaire.</p></fn><fn id="table11fn6"><p><sup>f</sup>PHQ-9: Patient Health Questionnaire-9.</p></fn></table-wrap-foot></table-wrap><table-wrap id="t12" position="float"><label>Table 12.</label><caption><p>STAGES R<sup>2</sup> values<sup><xref ref-type="table-fn" rid="table12fn1">a</xref></sup></p></caption><table id="table12" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Regression model type</td><td align="left" valign="bottom">Reference measure(s) included in the regression model</td><td align="left" valign="bottom">R<sup>2</sup> (standard or adjusted as appropriate)</td></tr></thead><tbody><tr><td align="left" valign="top" rowspan="4">SLR<sup><xref ref-type="table-fn" rid="table12fn2">b</xref></sup></td><td align="left" valign="top">FSS<sup><xref ref-type="table-fn" rid="table12fn4">d</xref></sup></td><td align="left" valign="top">0.030</td></tr><tr><td align="left" valign="top">GAD-7<sup><xref ref-type="table-fn" rid="table12fn5">e</xref></sup></td><td align="left" valign="top">0.006</td></tr><tr><td align="left" valign="top">PHQ-9<sup><xref ref-type="table-fn" rid="table12fn6">f</xref></sup></td><td align="left" valign="top">0.024</td></tr><tr><td align="left" valign="top">NOSE<sup><xref ref-type="table-fn" rid="table12fn7">g</xref></sup></td><td align="left" valign="top">0.009</td></tr><tr><td align="left" valign="top">MLR<sup><xref ref-type="table-fn" rid="table12fn3">c</xref></sup></td><td align="left" valign="top">All</td><td align="left" valign="top">0.033</td></tr></tbody></table><table-wrap-foot><fn id="table12fn1"><p><sup>a</sup>No daily surveys are included.</p></fn><fn id="table12fn2"><p><sup>b</sup>SLR: simple linear regression.</p></fn><fn id="table12fn3"><p><sup>c</sup>MLR: multiple linear regression.</p></fn><fn id="table12fn4"><p><sup>d</sup>FSS: Fatigue Severity Score.</p></fn><fn id="table12fn5"><p><sup>e</sup>GAD-7: Generalized Anxiety Disorder Questionnaire.</p></fn><fn id="table12fn6"><p><sup>f</sup>PHQ-9: Patient Health Questionnaire-9.</p></fn><fn id="table12fn7"><p><sup>g</sup>NOSE: Nasal Obstruction Symptom Evaluation.</p></fn></table-wrap-foot></table-wrap><table-wrap id="t13" position="float"><label>Table 13.</label><caption><p>Brighten R<sup>2</sup> values<sup><xref ref-type="table-fn" rid="table13fn1">a</xref></sup></p></caption><table id="table13" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom" rowspan="3">Digital variable</td><td align="left" valign="bottom" colspan="4">Regression model type</td></tr><tr><td align="left" valign="bottom" rowspan="2">SLR<sup><xref ref-type="table-fn" rid="table13fn2">b</xref></sup></td><td align="left" valign="bottom" colspan="3">MLR<sup><xref ref-type="table-fn" rid="table13fn3">c</xref></sup></td></tr><tr><td align="left" valign="bottom">Daily 1</td><td align="left" valign="bottom">Daily 2</td><td align="left" valign="bottom">Both dailies</td></tr></thead><tbody><tr><td align="left" valign="top">Unique numbers calls incoming</td><td align="left" valign="top">0.039</td><td align="left" valign="top">0.022</td><td align="left" valign="top">0.060</td><td align="left" valign="top">0.053</td></tr><tr><td align="left" valign="top">Unique numbers call outgoing</td><td align="left" valign="top">0.041</td><td align="left" valign="top">0.036</td><td align="left" valign="top">0.057</td><td align="left" valign="top">0.045</td></tr><tr><td align="left" valign="top">Unique numbers texts received</td><td align="left" valign="top">0.001</td><td align="left" valign="top">&#x2212;0.024</td><td align="left" valign="top">&#x2212;0.016</td><td align="left" valign="top">&#x2212;0.029</td></tr></tbody></table><table-wrap-foot><fn id="table13fn1"><p><sup>a</sup>All statistics use the PHQ-9 multiday recall reference measure. The two daily reference measures are the two individual questions isolated from the PHQ-2 (Patient Health Questionnaire-2), which assesses depression severity and was adapted to become a daily measure in this study.</p></fn><fn id="table13fn2"><p><sup>b</sup>SLR: simple linear regression.</p></fn><fn id="table13fn3"><p><sup>c</sup>MLR: multiple linear regression.</p></fn></table-wrap-foot></table-wrap><table-wrap id="t14" position="float"><label>Table 14.</label><caption><p>mPower R<sup>2</sup> values<sup><xref ref-type="table-fn" rid="table14fn1">a</xref></sup></p></caption><table id="table14" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Regression model type</td><td align="left" valign="bottom">Reference measure(s) included in the regression model</td><td align="left" valign="bottom">R<sup>2</sup> (standard or adjusted as appropriate)</td></tr></thead><tbody><tr><td align="left" valign="top" rowspan="2">SLR<sup><xref ref-type="table-fn" rid="table14fn2">b</xref></sup></td><td align="left" valign="top">UPDRS<sup><xref ref-type="table-fn" rid="table14fn3">c</xref></sup></td><td align="left" valign="top">0.131</td></tr><tr><td align="left" valign="top">PDQ-8<sup><xref ref-type="table-fn" rid="table14fn4">d</xref></sup></td><td align="left" valign="top">0.123</td></tr><tr><td align="left" valign="top">MLR<sup><xref ref-type="table-fn" rid="table14fn5">e</xref></sup></td><td align="left" valign="top">All</td><td align="left" valign="top">0.139</td></tr></tbody></table><table-wrap-foot><fn id="table14fn1"><p><sup>a</sup>No daily surveys are included.</p></fn><fn id="table14fn2"><p><sup>b</sup>SLR: simple linear regression.</p></fn><fn id="table14fn3"><p><sup>c</sup>UPDRS: Movement Disorder Society Unified Parkinson&#x2019;s Disease Rating Scale (selected questions).</p></fn><fn id="table14fn4"><p><sup>d</sup>PDQ-8: Parkinson&#x2019;s Disease Questionnaire (shortened version).</p></fn><fn id="table14fn5"><p><sup>e</sup>MLR: multiple linear regression.</p></fn></table-wrap-foot></table-wrap><p>In each dataset with a daily RM available (Urban Poor and Brighten), it was generally true that including daily RM data resulted in a stronger adjusted R<sup>2</sup> than when not including it. In datasets without a daily RM (STAGES and mPower), using multiple RMs generally resulted in a stronger R<sup>2</sup> than when using a single RM.</p></sec></sec></sec><sec id="s4" sec-type="discussion"><title>Discussion</title><sec id="s4-1"><title>Principal Findings</title><p>In this work, we assessed the feasibility of selected statistical methodology to estimate relationships between digital measures and COA RMs. We also investigated how properties of an AV study&#x2019;s design may affect the strength of the estimated relationships by using several statistical methodologies. We accomplished this by using real-world data, captured using sensor-based digital health technologies, to conduct hypothetical AV studies across a range of scenarios.</p><p>Our analysis of the 4 real-world datasets demonstrated that the CFA models were able to estimate a factor correlation in each case and that these correlations were greater than or equal to the corresponding Pearson correlation in magnitude. This finding is consistent with the prior simulation study [<xref ref-type="bibr" rid="ref9">9</xref>] and with established knowledge of how CFA models function. Specifically, because CFA methods assess the latent correlation between measures, and the correlation between latent variables is not attenuated by measurement error unlike PCCs [<xref ref-type="bibr" rid="ref31">31</xref>-<xref ref-type="bibr" rid="ref33">33</xref>], our results support the use of CFA to assess the relationship between a novel digital measure and a COA RM. The use of CFA in conjunction with PCCs facilitates a better understanding of the relationship that exists between the DM and the RM. CFA uses all available RM information in the analysis (ie, item-level data), versus PCCs and/or regression models alone, which aggregate the item-level RM data into total scores or mean values. Using multiple methods can lead to a range of estimates which can be used to support a validity argument.</p><p>However, the use of CFA comes with limitations. For example, CFA is known to require a larger sample size to produce stable estimates, and a number of necessary or sufficient conditions exist for the model to be identified, including requiring a minimum of 3 variables per factor (which implies that any COA RM used must comprise at least 3 items) [<xref ref-type="bibr" rid="ref31">31</xref>,<xref ref-type="bibr" rid="ref34">34</xref>,<xref ref-type="bibr" rid="ref35">35</xref>]. While it is difficult to determine a uniformly applicable minimum sample size, the consensus is that a sample of participants in at least the hundreds is desirable [<xref ref-type="bibr" rid="ref36">36</xref>]&#x2014;a threshold that many AV studies for digital measures to date have not met [<xref ref-type="bibr" rid="ref37">37</xref>-<xref ref-type="bibr" rid="ref39">39</xref>]. With the improving feasibility and necessity of conducting observational research in the out-of-laboratory environment, larger sample sizes are increasingly accessible. Such research is likely to use COA-based RMs, making the CFA approach particularly relevant.</p><p>A range of relationship values was exhibited, which indicates both successful and unsuccessful model fits across the 4 real-world datasets. The performance of the measures shown in this work supports the feasibility of the selected statistical methods when implemented in real-world data, as their implementation here was successful despite the estimated values being weak. Importantly, the datasets used represented sDHTs from multiple domains, including smartphones/communication and actigraphy data, supporting the applicability of these methods across domains. It is possible that additional digital measurement approaches (such as speech, wearable electroencephalography, etc) may also be well-suited to leveraging the learnings of this work.</p><p>Reasons that weak relationships are observed may include the following: the study design is not optimized for the measure of interest, the chosen RMs are limited in their assessment of the underlying construct measured by the DM in a particular use environment, or a relationship simply may not exist. Notably, previous studies that have explored relationships between sDHTs (eg, step counts from wearables) and RMs such as the PHQ-9 have demonstrated low correlation magnitudes (eg, &#x003C;|0.2|), suggesting that strong relationships may not necessarily be expected [<xref ref-type="bibr" rid="ref40">40</xref>,<xref ref-type="bibr" rid="ref41">41</xref>].</p><p>In the work conducted here, the datasets come from studies where the primary focus was not AV evidence generation. It is likely that this affected the estimation of relationships as the principles outlined in <xref ref-type="other" rid="box1">Textbox 1</xref> were violated by each dataset in varying amounts.</p></sec><sec id="s4-2"><title>Recommendations</title><p>We recommend that investigators seek a high level of temporal coherence between the measures chosen for their AV study of a novel digital measure. Good temporal coherence means that the sDHT data used in the AV analyses aligns with the recall period of the COA-based RM. Poorer temporal coherence between measures may decrease the values estimated with agreement statistics because each individual&#x2019;s level on the latent trait assessed by the measures (eg, health, disease severity, physical ability) may have changed over time. This is supported by the Brighten and mPower data, which have moderate to strong temporal coherence and the strongest relationships between measures.</p><p>In addition, we recommend that investigators seek a high level of construct coherence. Construct coherence assures that the DM and the RM are assessing as similar a concept as possible. Poor construct coherence is likely to lead to weak relationships between measures, even when using appropriate statistical methods. This is supported by the mPower data, which has the clearest and strongest construct coherence between measures and exhibited the strongest relationships between the measures.</p><p>We emphasize the need to determine the extent of data missing information and reduce measurement error in both the DM and RMs whenever possible. Data missing information particularly affects regression models, where incomplete cases will lead to entire participants&#x2019; data being excluded, thus reducing the sample size. This is supported by the mPower data, which retained its large sample size during analysis due to the data completeness of the RM. The R<sup>2</sup> values in this dataset were two to five times stronger in general than in the Brighten study, which had substantial RM missing information in a smaller starting sample.</p><p>In line with the above methodological considerations, we encourage investigators to carefully plan their AV studies to avoid making incorrect inferences from their results. As always, an argument for validity should be constructed and presented to all stakeholders for advice, including regulators.</p><p>Finally, we recommend that investigators review the assumptions and requirements of the statistical methods they plan to use in the AV study to understand how assumption violations may distort their results and whether such violations are likely to occur. For example, while Pearson correlation is known to be relatively robust in terms of violations of parametric assumptions [<xref ref-type="bibr" rid="ref42">42</xref>], CFA can be affected by moderate violations of its model assumptions [<xref ref-type="bibr" rid="ref43">43</xref>,<xref ref-type="bibr" rid="ref44">44</xref>], which can then affect fit index estimation, particularly in the case of the RMSEA model fit index [<xref ref-type="bibr" rid="ref45">45</xref>].</p></sec><sec id="s4-3"><title>COA-Specific Recommendations</title><p>If an investigator is using COA-based RMs in their study, then we recommend longitudinal data collection, including using at least 1 RM with a daily recall period. Using a daily recall RM when the digital measure collects daily summary data is particularly recommended due to the expected strong temporal coherence between the measures.</p><p>When using RMs with multi-day recall periods, researchers should collect digital measure data on each day that the recall period pertains to and have a strong, enactable strategy to minimize data missing information in this period (such as calling patients the day before the beginning of the wear period to remind them to use the sDHT). These good practices can ensure the best opportunity for temporal coherence.</p><p>In addition, we recommend seeking construct coherence at the item level of the RMs. COA-based RMs are often derived from multidimensional clinical scales [<xref ref-type="bibr" rid="ref46">46</xref>,<xref ref-type="bibr" rid="ref47">47</xref>], which means that items or domains of a COA may have varying construct coherence with the DM. It may be appropriate to select specific items or domains that tightly reflect the latent construct under examination to use as an RM. This may lead to a stronger relationship between measures than a simple aggregation of all items or domains.</p><p><xref ref-type="table" rid="table15">Table 15</xref> summarizes all the above recommendations and provides practical directions to aid in appropriate study design for AV of novel digital measures.</p><table-wrap id="t15" position="float"><label>Table 15.</label><caption><p>Considerations for designing a strong AV study for a novel digital measure.</p></caption><table id="table15" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Category</td><td align="left" valign="bottom">Considerations</td></tr></thead><tbody><tr><td align="left" valign="top" colspan="2">Digital measure data collection</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Number of days</td><td align="left" valign="top">Longitudinal collection on consecutive days allows for the use of CFA methods, as long as at least 3 days are collected. Have an enactable participant engagement strategy to minimize data missing information.</td></tr><tr><td align="left" valign="top" colspan="2">Study design</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Rigor and quality of RMs</td><td align="left" valign="top">High-quality and high-rigor RMs enable the possibility for the strongest claims about the DM (see Bakker et al [<xref ref-type="bibr" rid="ref5">5</xref>] for a potential hierarchy of RM quality and rigor).</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Objectivity of RMs</td><td align="left" valign="top">Standardized data collection in an RM improves accuracy by reducing measurement error. Standardized data processing and standardized and trained interpretation reduce ambiguity and avoid issues with inter-rater variability.</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>RM construct coherence</td><td align="left" valign="top">Good construct coherence between measures may strengthen the values estimated from agreement statistics. Poor construct coherence may cause issues, even if the methods are well suited to assessing agreement. Consider the effect of construct coherence at the item and instrument level if using a COA RM.</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>RM temporal coherence</td><td align="left" valign="top">Good temporal coherence aligns data capture, meaning the measures assess a subject over the same period. Poor temporal coherence may decrease the values estimated with agreement statistics because the measures assess the construct at different times and the level of the construct is subject to change. If using a COA RM,<list list-type="bullet"><list-item><p>Consider the benefit of using a daily recall period and assessing on the same days as the digital measure, if, for example, the digital measure collects daily summary count data.</p></list-item></list><list list-type="bullet"><list-item><p>If using a multiday recall period COA, then applying the RM at the end of the period of digital measure data collection and collecting digital measure data on each day of the recall period are expected to increase temporal coherence.</p></list-item></list></td></tr><tr><td align="left" valign="top" rowspan="5"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Miscellaneous</td><td align="left" valign="top">To minimize distortion of results, review the assumptions and requirements of the statistical methods used and avoid violations of assumptions where possible.</td></tr><tr><td align="left" valign="top">Identify factors that may influence missing information and measurement error in data capture and seek to minimize these where possible.</td></tr><tr><td align="left" valign="top">Qualitatively assess the limitations of the study design ahead of conducting it and accept that the threshold for good agreement between measures may be smaller when well-established and rigorous RMs are not available.</td></tr><tr><td align="left" valign="top">Consider more extensive clinical validation and validity testing by assessing repeatability, reliability, and ability to detect change over time when it appears the AV study will not allow you to establish rigorous validation claims. All claims must be validated and verified and backed up with sufficient evidence (subject to regulatory review).</td></tr><tr><td align="left" valign="top">The quality of an RM affects what claims can be made about the performance of the DM. Perfect agreement between measures may not be enough for the validation of a novel DM, when the measure is hoped to outperform the RM and available RMs are poor.</td></tr><tr><td align="left" valign="top" colspan="2">Statistical methods for assessing agreement</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>CFA</td><td align="left" valign="top">CFA can account for measurement error and variance at the item level when working with COA RMs since it can assess the latent correlation between the measures, and correlation between latent variables is not attenuated by measurement error.</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Pearson correlation</td><td align="left" valign="top">Pearson correlation is stable, easier to compute, and relatively robust in terms of violations of parametric assumptions. Pearson correlation is known to underestimate the true correlation between measures because of attenuation by measurement error.</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Linear regression</td><td align="left" valign="top">If multiple RMs are being used in the study, then MLR may provide a route to a stronger assessment of agreement between measures than individual SLR, particularly if one has an RM that captures daily data.</td></tr><tr><td align="left" valign="top">Sample size</td><td align="left" valign="top">The statistical methods used in an AV study affect the appropriate minimum sample size. Methods such as CFA often require a large sample, which could be fulfilled by repeated measures from each participant.</td></tr></tbody></table></table-wrap></sec><sec id="s4-4"><title>Conclusions</title><p>This study demonstrated the feasibility of applying the analytical methodologies that were evaluated in our previous simulation study [<xref ref-type="bibr" rid="ref9">9</xref>] to a series of real-world datasets. Furthermore, we demonstrated that the performance of different statistical tools (eg, CFA vs PCC) when applied to real data largely recapitulated the trends seen in previous simulated data [<xref ref-type="bibr" rid="ref9">9</xref>]. Additionally, characteristics of the analyzed datasets, such as sample size, temporal coherence, and missing information patterns, had impacts on analysis that motivated our recommendations for specific design considerations in AV studies.</p><p>By using a standardized methodology for evaluating novel digital measures, developers, biostatisticians, and clinical researchers will be able to navigate the complex validation landscape more easily, with more certainty, and with more tools at their disposal when undertaking an analytical validity study.</p><p>Adopting standardized practices for the conduct of analytical validation studies creates a common approach that improves understanding and expedites the pathway to validation and regulatory review. This may, in turn, provide indirect cost savings in clinical trials by enabling a more rigorous development of sDHT-based technologies, which themselves offer considerable direct reductions in costs associated with recruitment, retention, and follow-up [<xref ref-type="bibr" rid="ref48">48</xref>].</p></sec></sec></body><back><ack><p>The authors gratefully acknowledge the contributions of the following experts through participation in the statistical advisory committee, advice on dataset acquisition, and and asynchronous review of the results: Chakib Battoui, Jakob Bj&#x00F8;rner, Yiorgos Christakis, Valentin Hamy, Andrew Potter, Bohdana Ratitch, David Reasner, Colleen Russell, Sachin Shah, Berend Terluin, Andrew Trigg, Kevin Weinfurt, Robert Wright.</p><p>In addition, the authors gratefully acknowledge the contributions of DiMe members for their support: Sarah Averill Lott, Samantha McClenahan, Bethanie McCrary, Nicole Medina, Danielle Stefko, and Benjamin Vandendriessche.</p></ack><notes><sec><title>Data Availability</title><p>For the Urban Poor dataset research, the National Sleep Research Resource was supported by the U.S. National Institutes of Health, National Heart Lung and Blood Institute (R24 HL114473, 75N92019R002). The STAGES dataset research was conducted using the STAGES - Stanford Technology, Analytics and Genomics in Sleep Resource funded by the Klarman Family Foundation. The investigators of the STAGES study contributed to the design and implementation of the STAGES cohort and/or provided data and/or collected biospecimens but did not necessarily participate in the analysis or writing of this report. The full list of STAGES investigators can be found at the project website.</p><p>The National Sleep Research Resource was supported by the U.S. National Institutes of Health, National Heart Lung and Blood Institute (R24 HL114473, 75N92019R002). The mPower dataset was contributed by users of the Parkinson mPower mobile application as part of the mPower study developed by Sage Bionetworks [<xref ref-type="bibr" rid="ref49">49</xref>]. The Brighten Dataset was contributed by participants in the Brighten study [<xref ref-type="bibr" rid="ref13">13</xref>,<xref ref-type="bibr" rid="ref50">50</xref>].</p></sec></notes><fn-group><fn fn-type="conflict"><p>None declared.</p></fn></fn-group><glossary><title>Abbreviations</title><def-list><def-item><term id="abb1">AV</term><def><p>analytical validation</p></def></def-item><def-item><term id="abb2">CFA</term><def><p>confirmatory factor analysis</p></def></def-item><def-item><term id="abb3">COAs</term><def><p>clinical outcome assessments</p></def></def-item><def-item><term id="abb4">DM</term><def><p>digital measure</p></def></def-item><def-item><term id="abb5">IRB</term><def><p>institutional review board</p></def></def-item><def-item><term id="abb6">MLR</term><def><p>multiple linear regression</p></def></def-item><def-item><term id="abb7">RM</term><def><p>reference measure</p></def></def-item><def-item><term id="abb8">sDHT</term><def><p>Sensor-based digital health technology</p></def></def-item><def-item><term id="abb9">SLR</term><def><p>simple linear regression</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>DiMasi</surname><given-names>JA</given-names> </name><name name-style="western"><surname>Dirks</surname><given-names>A</given-names> </name><name name-style="western"><surname>Smith</surname><given-names>Z</given-names> </name><etal/></person-group><article-title>Assessing the net financial benefits of employing digital endpoints in clinical trials</article-title><source>Clin Transl Sci</source><year>2024</year><month>08</month><volume>17</volume><issue>8</issue><fpage>e13902</fpage><pub-id pub-id-type="doi">10.1111/cts.13902</pub-id><pub-id pub-id-type="medline">39072949</pub-id></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="web"><article-title>European Medicines Agency</article-title><source>Qualification opinion for stride velocity 95th centile as primary endpoint in studies in ambulatory Duchenne muscular dystrophy studies</source><year>2023</year><month>02</month><day>20</day><access-date>2024-12-19</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://tinyurl.com/hshp3pn3">https://tinyurl.com/hshp3pn3</ext-link></comment></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Brognara</surname><given-names>L</given-names> </name><name name-style="western"><surname>Palumbo</surname><given-names>P</given-names> </name><name name-style="western"><surname>Grimm</surname><given-names>B</given-names> </name><name name-style="western"><surname>Palmerini</surname><given-names>L</given-names> </name></person-group><article-title>Assessing gait in Parkinson&#x2019;s disease using wearable motion sensors: a systematic review</article-title><source>Diseases</source><year>2019</year><month>02</month><day>5</day><volume>7</volume><issue>1</issue><fpage>18</fpage><pub-id pub-id-type="doi">10.3390/diseases7010018</pub-id><pub-id pub-id-type="medline">30764502</pub-id></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Goldsack</surname><given-names>JC</given-names> </name><name name-style="western"><surname>Coravos</surname><given-names>A</given-names> </name><name name-style="western"><surname>Bakker</surname><given-names>JP</given-names> </name><etal/></person-group><article-title>Verification, analytical validation, and clinical validation (V3): the foundation of determining fit-for-purpose for Biometric Monitoring Technologies (BioMeTs)</article-title><source>NPJ Digit Med</source><year>2020</year><volume>3</volume><fpage>55</fpage><pub-id pub-id-type="doi">10.1038/s41746-020-0260-4</pub-id><pub-id pub-id-type="medline">32337371</pub-id></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="web"><person-group person-group-type="author"><name name-style="western"><surname>Bakker</surname><given-names>JP</given-names> </name><name name-style="western"><surname>Barge</surname><given-names>R</given-names> </name><name name-style="western"><surname>Centra</surname><given-names>J</given-names> </name><etal/></person-group><article-title>Digital Medicine Society</article-title><source>V3+: An extension to the V3 framework to ensure user-centricity and scalability of sensor-based digital health technologies</source><year>2024</year><access-date>2024-12-19</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://datacc.dimesociety.org/resources/v3-an-extension-to-the-v3-framework-to-ensure-user-centricity-and-scalability-of-sensor-based-digital-health-technologies/">https://datacc.dimesociety.org/resources/v3-an-extension-to-the-v3-framework-to-ensure-user-centricity-and-scalability-of-sensor-based-digital-health-technologies/</ext-link></comment></nlm-citation></ref><ref id="ref6"><label>6</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ratitch</surname><given-names>B</given-names> </name><name name-style="western"><surname>Trigg</surname><given-names>A</given-names> </name><name name-style="western"><surname>Majumder</surname><given-names>M</given-names> </name><name name-style="western"><surname>Vlajnic</surname><given-names>V</given-names> </name><name name-style="western"><surname>Rethemeier</surname><given-names>N</given-names> </name><name name-style="western"><surname>Nkulikiyinka</surname><given-names>R</given-names> </name></person-group><article-title>Clinical validation of novel digital measures: statistical methods for reliability evaluation</article-title><source>Digit Biomark</source><year>2023</year><volume>7</volume><issue>1</issue><fpage>74</fpage><lpage>91</lpage><pub-id pub-id-type="doi">10.1159/000531054</pub-id><pub-id pub-id-type="medline">37588480</pub-id></nlm-citation></ref><ref id="ref7"><label>7</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Rowe</surname><given-names>HP</given-names> </name><name name-style="western"><surname>Stipancic</surname><given-names>KL</given-names> </name><name name-style="western"><surname>Lammert</surname><given-names>AC</given-names> </name><name name-style="western"><surname>Green</surname><given-names>JR</given-names> </name></person-group><article-title>Validation of an acoustic-based framework of speech motor control: assessing criterion and construct validity using kinematic and perceptual measures</article-title><source>J Speech Lang Hear Res</source><year>2021</year><month>12</month><day>13</day><volume>64</volume><issue>12</issue><fpage>4736</fpage><lpage>4753</lpage><pub-id pub-id-type="doi">10.1044/2021_JSLHR-21-00201</pub-id><pub-id pub-id-type="medline">34735295</pub-id></nlm-citation></ref><ref id="ref8"><label>8</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Tr&#x00F6;ger</surname><given-names>J</given-names> </name><name name-style="western"><surname>Baykara</surname><given-names>E</given-names> </name><name name-style="western"><surname>Zhao</surname><given-names>J</given-names> </name><etal/></person-group><article-title>Validation of the remote automated ki:e speech Biomarker for cognition in mild cognitive impairment: verification and validation following DiME V3 framework</article-title><source>Digit Biomark</source><year>2022</year><volume>6</volume><issue>3</issue><fpage>107</fpage><lpage>116</lpage><pub-id pub-id-type="doi">10.1159/000526471</pub-id><pub-id pub-id-type="medline">36466952</pub-id></nlm-citation></ref><ref id="ref9"><label>9</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Turner</surname><given-names>S</given-names> </name><name name-style="western"><surname>Chen</surname><given-names>C</given-names> </name><name name-style="western"><surname>Acosta</surname><given-names>R</given-names> </name><etal/></person-group><article-title>Methods for analytical validation of novel digital clinical measures: a simulation study</article-title><source>Health Informatics</source><comment>Preprint posted online on 2024</comment><pub-id pub-id-type="doi">10.1101/2024.11.29.24318211</pub-id></nlm-citation></ref><ref id="ref10"><label>10</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Zhang</surname><given-names>GQ</given-names> </name><name name-style="western"><surname>Cui</surname><given-names>L</given-names> </name><name name-style="western"><surname>Mueller</surname><given-names>R</given-names> </name><etal/></person-group><article-title>The National Sleep Research Resource: towards a sleep data commons</article-title><source>J Am Med Inform Assoc</source><year>2018</year><month>10</month><day>1</day><volume>25</volume><issue>10</issue><fpage>1351</fpage><lpage>1358</lpage><pub-id pub-id-type="doi">10.1093/jamia/ocy064</pub-id><pub-id pub-id-type="medline">29860441</pub-id></nlm-citation></ref><ref id="ref11"><label>11</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Bessone</surname><given-names>P</given-names> </name><name name-style="western"><surname>Rao</surname><given-names>G</given-names> </name><name name-style="western"><surname>Schilbach</surname><given-names>F</given-names> </name><name name-style="western"><surname>Schofield</surname><given-names>H</given-names> </name><name name-style="western"><surname>Toma</surname><given-names>M</given-names> </name></person-group><article-title>The economic consequences of increasing sleep among the urban poor</article-title><source>Q J Econ</source><year>2021</year><month>08</month><volume>136</volume><issue>3</issue><fpage>1887</fpage><lpage>1941</lpage><pub-id pub-id-type="doi">10.1093/qje/qjab013</pub-id><pub-id pub-id-type="medline">34220361</pub-id></nlm-citation></ref><ref id="ref12"><label>12</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Bot</surname><given-names>BM</given-names> </name><name name-style="western"><surname>Suver</surname><given-names>C</given-names> </name><name name-style="western"><surname>Neto</surname><given-names>EC</given-names> </name><etal/></person-group><article-title>The mPower study, Parkinson disease mobile data collected using ResearchKit</article-title><source>Sci Data</source><year>2016</year><month>03</month><day>3</day><volume>3</volume><issue>1</issue><fpage>160011</fpage><pub-id pub-id-type="doi">10.1038/sdata.2016.11</pub-id><pub-id pub-id-type="medline">26938265</pub-id></nlm-citation></ref><ref id="ref13"><label>13</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Arean</surname><given-names>PA</given-names> </name><name name-style="western"><surname>Hallgren</surname><given-names>KA</given-names> </name><name name-style="western"><surname>Jordan</surname><given-names>JT</given-names> </name><etal/></person-group><article-title>The use and effectiveness of mobile apps for depression: results from a fully remote clinical trial</article-title><source>J Med Internet Res</source><year>2016</year><month>12</month><day>20</day><volume>18</volume><issue>12</issue><fpage>e330</fpage><pub-id pub-id-type="doi">10.2196/jmir.6482</pub-id><pub-id pub-id-type="medline">27998876</pub-id></nlm-citation></ref><ref id="ref14"><label>14</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Rhatigan</surname><given-names>K</given-names> </name><name name-style="western"><surname>Hirons</surname><given-names>B</given-names> </name><name name-style="western"><surname>Kesavan</surname><given-names>H</given-names> </name><etal/></person-group><article-title>Patient global impression of severity scale in chronic cough: validation and formulation of symptom severity categories</article-title><source>J Allergy Clin Immunol Pract</source><year>2023</year><month>12</month><volume>11</volume><issue>12</issue><fpage>3706</fpage><lpage>3712</lpage><pub-id pub-id-type="doi">10.1016/j.jaip.2023.08.046</pub-id><pub-id pub-id-type="medline">37678666</pub-id></nlm-citation></ref><ref id="ref15"><label>15</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kroenke</surname><given-names>K</given-names> </name><name name-style="western"><surname>Spitzer</surname><given-names>RL</given-names> </name><name name-style="western"><surname>Williams</surname><given-names>JB</given-names> </name></person-group><article-title>The PHQ-9: validity of a brief depression severity measure</article-title><source>J Gen Intern Med</source><year>2001</year><month>09</month><volume>16</volume><issue>9</issue><fpage>606</fpage><lpage>613</lpage><pub-id pub-id-type="doi">10.1046/j.1525-1497.2001.016009606.x</pub-id><pub-id pub-id-type="medline">11556941</pub-id></nlm-citation></ref><ref id="ref16"><label>16</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Rosenberg</surname><given-names>M</given-names> </name></person-group><source>Rosenberg Self-Esteem Scale</source><year>1965</year><publisher-name>APA PsycTests</publisher-name><pub-id pub-id-type="doi">10.1037/t01038-000</pub-id></nlm-citation></ref><ref id="ref17"><label>17</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Spitzer</surname><given-names>RL</given-names> </name><name name-style="western"><surname>Kroenke</surname><given-names>K</given-names> </name><name name-style="western"><surname>Williams</surname><given-names>JBW</given-names> </name><name name-style="western"><surname>L&#x00F6;we</surname><given-names>B</given-names> </name></person-group><article-title>A brief measure for assessing generalized anxiety disorder: the GAD-7</article-title><source>Arch Intern Med</source><year>2006</year><month>05</month><day>22</day><volume>166</volume><issue>10</issue><fpage>1092</fpage><lpage>1097</lpage><pub-id pub-id-type="doi">10.1001/archinte.166.10.1092</pub-id><pub-id pub-id-type="medline">16717171</pub-id></nlm-citation></ref><ref id="ref18"><label>18</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Krupp</surname><given-names>LB</given-names> </name><name name-style="western"><surname>LaRocca</surname><given-names>NG</given-names> </name><name name-style="western"><surname>Muir-Nash</surname><given-names>J</given-names> </name><name name-style="western"><surname>Steinberg</surname><given-names>AD</given-names> </name></person-group><article-title>The fatigue severity scale. Application to patients with multiple sclerosis and systemic lupus erythematosus</article-title><source>Arch Neurol</source><year>1989</year><month>10</month><volume>46</volume><issue>10</issue><fpage>1121</fpage><lpage>1123</lpage><pub-id pub-id-type="doi">10.1001/archneur.1989.00520460115022</pub-id><pub-id pub-id-type="medline">2803071</pub-id></nlm-citation></ref><ref id="ref19"><label>19</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Stewart</surname><given-names>MG</given-names> </name><name name-style="western"><surname>Witsell</surname><given-names>DL</given-names> </name><name name-style="western"><surname>Smith</surname><given-names>TL</given-names> </name><name name-style="western"><surname>Weaver</surname><given-names>EM</given-names> </name><name name-style="western"><surname>Yueh</surname><given-names>B</given-names> </name><name name-style="western"><surname>Hannley</surname><given-names>MT</given-names> </name></person-group><article-title>Development and validation of the Nasal Obstruction Symptom Evaluation (NOSE) scale</article-title><source>Otolaryngol Head Neck Surg</source><year>2004</year><month>02</month><volume>130</volume><issue>2</issue><fpage>157</fpage><lpage>163</lpage><pub-id pub-id-type="doi">10.1016/j.otohns.2003.09.016</pub-id><pub-id pub-id-type="medline">14990910</pub-id></nlm-citation></ref><ref id="ref20"><label>20</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Fahn</surname><given-names>S</given-names> </name><name name-style="western"><surname>Elton</surname><given-names>RL</given-names> </name></person-group><person-group person-group-type="editor"><name name-style="western"><surname>Fahn</surname><given-names>S</given-names> </name><name name-style="western"><surname>Marsden</surname><given-names>CD</given-names> </name><name name-style="western"><surname>Calne,</surname><given-names>DB</given-names> </name><name name-style="western"><surname>Goldstein</surname><given-names>M</given-names> </name></person-group><article-title>Unified Parkinson&#x2019;s disease rating scale</article-title><source>Recent Developments in Parkinson&#x2019;s Disease</source><year>1987</year><access-date>2025-10-29</access-date><volume>2</volume><publisher-name>Macmillan Health Care Information</publisher-name><fpage>153</fpage><lpage>164</lpage><comment><ext-link ext-link-type="uri" xlink:href="https://www.movementdisorders.org/MDS-Files1/PDFs/Task-Force-Papers/unified.pdf">https://www.movementdisorders.org/MDS-Files1/PDFs/Task-Force-Papers/unified.pdf</ext-link></comment></nlm-citation></ref><ref id="ref21"><label>21</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Jenkinson</surname><given-names>C</given-names> </name><name name-style="western"><surname>Fitzpatrick</surname><given-names>R</given-names> </name><name name-style="western"><surname>Peto</surname><given-names>V</given-names> </name><name name-style="western"><surname>Greenhall</surname><given-names>R</given-names> </name><name name-style="western"><surname>Hyman</surname><given-names>N</given-names> </name></person-group><article-title>The PDQ-8: development and validation of a short-form Parkinson&#x2019;s disease questionnaire</article-title><source>Psychol Health</source><year>1997</year><month>12</month><volume>12</volume><issue>6</issue><fpage>805</fpage><lpage>814</lpage><pub-id pub-id-type="doi">10.1080/08870449708406741</pub-id></nlm-citation></ref><ref id="ref22"><label>22</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kroenke</surname><given-names>K</given-names> </name><name name-style="western"><surname>Spitzer</surname><given-names>RL</given-names> </name><name name-style="western"><surname>Williams</surname><given-names>JBW</given-names> </name></person-group><article-title>The Patient Health Questionnaire-2: validity of a two-item depression screener</article-title><source>Med Care</source><year>2003</year><month>11</month><volume>41</volume><issue>11</issue><fpage>1284</fpage><lpage>1292</lpage><pub-id pub-id-type="doi">10.1097/01.MLR.0000093487.78664.3C</pub-id><pub-id pub-id-type="medline">14583691</pub-id></nlm-citation></ref><ref id="ref23"><label>23</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Yao</surname><given-names>J</given-names> </name><name name-style="western"><surname>Tan</surname><given-names>CS</given-names> </name><name name-style="western"><surname>Lim</surname><given-names>N</given-names> </name><name name-style="western"><surname>Tan</surname><given-names>J</given-names> </name><name name-style="western"><surname>Chen</surname><given-names>C</given-names> </name><name name-style="western"><surname>M&#x00FC;ller-Riemenschneider</surname><given-names>F</given-names> </name></person-group><article-title>Number of daily measurements needed to estimate habitual step count levels using wrist-worn trackers and smartphones in 212,048 adults</article-title><source>Sci Rep</source><year>2021</year><month>05</month><day>5</day><volume>11</volume><issue>1</issue><fpage>9633</fpage><pub-id pub-id-type="doi">10.1038/s41598-021-89141-3</pub-id><pub-id pub-id-type="medline">33953288</pub-id></nlm-citation></ref><ref id="ref24"><label>24</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Hart</surname><given-names>TL</given-names> </name><name name-style="western"><surname>Swartz</surname><given-names>AM</given-names> </name><name name-style="western"><surname>Cashin</surname><given-names>SE</given-names> </name><name name-style="western"><surname>Strath</surname><given-names>SJ</given-names> </name></person-group><article-title>How many days of monitoring predict physical activity and sedentary behaviour in older adults?</article-title><source>Int J Behav Nutr Phys Act</source><year>2011</year><month>06</month><day>16</day><volume>8</volume><issue>1</issue><fpage>62</fpage><pub-id pub-id-type="doi">10.1186/1479-5868-8-62</pub-id><pub-id pub-id-type="medline">21679426</pub-id></nlm-citation></ref><ref id="ref25"><label>25</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Dillon</surname><given-names>CB</given-names> </name><name name-style="western"><surname>Fitzgerald</surname><given-names>AP</given-names> </name><name name-style="western"><surname>Kearney</surname><given-names>PM</given-names> </name><etal/></person-group><article-title>Number of days required to estimate habitual activity using wrist-worn GENEActiv accelerometer: a cross-sectional study</article-title><source>PLoS ONE</source><year>2016</year><volume>11</volume><issue>5</issue><fpage>e0109913</fpage><pub-id pub-id-type="doi">10.1371/journal.pone.0109913</pub-id><pub-id pub-id-type="medline">27149674</pub-id></nlm-citation></ref><ref id="ref26"><label>26</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Muth&#x00E9;n</surname><given-names>B</given-names> </name></person-group><article-title>A general structural equation model with dichotomous, ordered categorical, and continuous latent variable indicators</article-title><source>Psychometrika</source><year>1984</year><month>03</month><volume>49</volume><issue>1</issue><fpage>115</fpage><lpage>132</lpage><pub-id pub-id-type="doi">10.1007/BF02294210</pub-id></nlm-citation></ref><ref id="ref27"><label>27</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Flora</surname><given-names>DB</given-names> </name><name name-style="western"><surname>Curran</surname><given-names>PJ</given-names> </name></person-group><article-title>An empirical evaluation of alternative methods of estimation for confirmatory factor analysis with ordinal data</article-title><source>Psychol Methods</source><year>2004</year><month>12</month><volume>9</volume><issue>4</issue><fpage>466</fpage><lpage>491</lpage><pub-id pub-id-type="doi">10.1037/1082-989X.9.4.466</pub-id><pub-id pub-id-type="medline">15598100</pub-id></nlm-citation></ref><ref id="ref28"><label>28</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Hu</surname><given-names>LT</given-names> </name><name name-style="western"><surname>Bentler</surname><given-names>PM</given-names> </name></person-group><article-title>Cutoff criteria for fit indexes in covariance structure analysis: conventional criteria versus new alternatives</article-title><source>Struct Equ Modeling</source><year>1999</year><month>01</month><volume>6</volume><issue>1</issue><fpage>1</fpage><lpage>55</lpage><pub-id pub-id-type="doi">10.1080/10705519909540118</pub-id></nlm-citation></ref><ref id="ref29"><label>29</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Kline</surname><given-names>RB</given-names> </name></person-group><source>Principles and Practice of Structural Equation Modeling</source><year>2023</year><edition>5</edition><publisher-name>Guilford Press</publisher-name><pub-id pub-id-type="other">ISBN-10 1462551912</pub-id></nlm-citation></ref><ref id="ref30"><label>30</label><nlm-citation citation-type="web"><person-group person-group-type="author"><collab>R Core Team</collab></person-group><article-title>R: a language and environment for statistical computing</article-title><source>R Foundation for Statistical Computing</source><year>2024</year><access-date>2024-12-19</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.r-project.org/">https://www.r-project.org/</ext-link></comment></nlm-citation></ref><ref id="ref31"><label>31</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Comrey</surname><given-names>AL</given-names> </name><name name-style="western"><surname>Lee</surname><given-names>HB</given-names> </name></person-group><source>A First Course in Factor Analysis</source><year>2013</year><edition>2</edition><publisher-name>Psychology Press</publisher-name><pub-id pub-id-type="doi">10.4324/9781315827506</pub-id></nlm-citation></ref><ref id="ref32"><label>32</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Mishra</surname><given-names>M</given-names> </name></person-group><article-title>Confirmatory factor analysis (CFA) as an analytical technique to assess measurement error in survey research</article-title><source>Paradigm: A Management Research Journal</source><year>2016</year><month>12</month><volume>20</volume><issue>2</issue><fpage>97</fpage><lpage>112</lpage><pub-id pub-id-type="doi">10.1177/0971890716672933</pub-id></nlm-citation></ref><ref id="ref33"><label>33</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Humphreys</surname><given-names>RK</given-names> </name><name name-style="western"><surname>Puth</surname><given-names>MT</given-names> </name><name name-style="western"><surname>Neuh&#x00E4;user</surname><given-names>M</given-names> </name><name name-style="western"><surname>Ruxton</surname><given-names>GD</given-names> </name></person-group><article-title>Underestimation of Pearson&#x2019;s product moment correlation statistic</article-title><source>Oecologia</source><year>2019</year><month>01</month><volume>189</volume><issue>1</issue><fpage>1</fpage><lpage>7</lpage><pub-id pub-id-type="doi">10.1007/s00442-018-4233-0</pub-id><pub-id pub-id-type="medline">30062565</pub-id></nlm-citation></ref><ref id="ref34"><label>34</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Kline</surname><given-names>P</given-names> </name></person-group><source>An Easy Guide to Factor Analysis</source><year>2014</year><edition>1</edition><publisher-name>Routledge</publisher-name><pub-id pub-id-type="doi">10.4324/9781315788135</pub-id></nlm-citation></ref><ref id="ref35"><label>35</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Velicer</surname><given-names>WF</given-names> </name><name name-style="western"><surname>Fava</surname><given-names>JL</given-names> </name></person-group><article-title>Affects of variable and subject sampling on factor pattern recovery</article-title><source>Psychol Methods</source><year>1998</year><volume>3</volume><issue>2</issue><fpage>231</fpage><lpage>251</lpage><pub-id pub-id-type="doi">10.1037/1082-989X.3.2.231</pub-id></nlm-citation></ref><ref id="ref36"><label>36</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>MacCallum</surname><given-names>RC</given-names> </name><name name-style="western"><surname>Widaman</surname><given-names>KF</given-names> </name><name name-style="western"><surname>Zhang</surname><given-names>S</given-names> </name><name name-style="western"><surname>Hong</surname><given-names>S</given-names> </name></person-group><article-title>Sample size in factor analysis</article-title><source>Psychol Methods</source><year>1999</year><volume>4</volume><issue>1</issue><fpage>84</fpage><lpage>99</lpage><pub-id pub-id-type="doi">10.1037//1082-989X.4.1.84</pub-id></nlm-citation></ref><ref id="ref37"><label>37</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wu</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Luttrell</surname><given-names>I</given-names> </name><name name-style="western"><surname>Feng</surname><given-names>S</given-names> </name><etal/></person-group><article-title>Development and validation of a machine learning, smartphone-based tonometer</article-title><source>Br J Ophthalmol</source><year>2020</year><month>10</month><volume>104</volume><issue>10</issue><fpage>1394</fpage><lpage>1398</lpage><pub-id pub-id-type="doi">10.1136/bjophthalmol-2019-315446</pub-id></nlm-citation></ref><ref id="ref38"><label>38</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Greene</surname><given-names>BR</given-names> </name><name name-style="western"><surname>Premoli</surname><given-names>I</given-names> </name><name name-style="western"><surname>McManus</surname><given-names>K</given-names> </name><name name-style="western"><surname>McGrath</surname><given-names>D</given-names> </name><name name-style="western"><surname>Caulfield</surname><given-names>B</given-names> </name></person-group><article-title>Predicting fall counts using wearable sensors: a novel digital biomarker for Parkinson&#x2019;s disease</article-title><source>Sensors (Basel)</source><year>2021</year><month>12</month><day>22</day><volume>22</volume><issue>1</issue><fpage>54</fpage><pub-id pub-id-type="doi">10.3390/s22010054</pub-id><pub-id pub-id-type="medline">35009599</pub-id></nlm-citation></ref><ref id="ref39"><label>39</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Formstone</surname><given-names>L</given-names> </name><name name-style="western"><surname>Huo</surname><given-names>W</given-names> </name><name name-style="western"><surname>Wilson</surname><given-names>S</given-names> </name><name name-style="western"><surname>McGregor</surname><given-names>A</given-names> </name><name name-style="western"><surname>Bentley</surname><given-names>P</given-names> </name><name name-style="western"><surname>Vaidyanathan</surname><given-names>R</given-names> </name></person-group><article-title>Quantification of motor function post-stroke using novel combination of wearable inertial and mechanomyographic sensors</article-title><source>IEEE Trans Neural Syst Rehabil Eng</source><year>2021</year><volume>29</volume><fpage>1158</fpage><lpage>1167</lpage><pub-id pub-id-type="doi">10.1109/TNSRE.2021.3089613</pub-id><pub-id pub-id-type="medline">34129501</pub-id></nlm-citation></ref><ref id="ref40"><label>40</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Holber</surname><given-names>JP</given-names> </name><name name-style="western"><surname>Abebe</surname><given-names>KZ</given-names> </name><name name-style="western"><surname>Huang</surname><given-names>Y</given-names> </name><etal/></person-group><article-title>The relationship between objectively measured step count, clinical characteristics, and quality of life among depressed patients recently hospitalized with systolic heart failure</article-title><source>Psychosom Med</source><year>2022</year><volume>84</volume><issue>2</issue><fpage>231</fpage><lpage>236</lpage><pub-id pub-id-type="doi">10.1097/PSY.0000000000001034</pub-id><pub-id pub-id-type="medline">34724453</pub-id></nlm-citation></ref><ref id="ref41"><label>41</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Bizzozero-Peroni</surname><given-names>B</given-names> </name><name name-style="western"><surname>D&#x00ED;az-Go&#x00F1;i</surname><given-names>V</given-names> </name><name name-style="western"><surname>Jim&#x00E9;nez-L&#x00F3;pez</surname><given-names>E</given-names> </name><etal/></person-group><article-title>Daily step count and depression in adults: a systematic review and meta-analysis</article-title><source>JAMA Netw Open</source><year>2024</year><month>12</month><day>2</day><volume>7</volume><issue>12</issue><fpage>e2451208</fpage><pub-id pub-id-type="doi">10.1001/jamanetworkopen.2024.51208</pub-id><pub-id pub-id-type="medline">39680407</pub-id></nlm-citation></ref><ref id="ref42"><label>42</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Havlicek</surname><given-names>LL</given-names> </name><name name-style="western"><surname>Peterson</surname><given-names>NL</given-names> </name></person-group><article-title>Robustness of the Pearson correlation against violations of assumptions</article-title><source>Percept Mot Skills</source><year>1976</year><month>12</month><volume>43</volume><issue>3_suppl</issue><fpage>1319</fpage><lpage>1334</lpage><pub-id pub-id-type="doi">10.2466/pms.1976.43.3f.1319</pub-id></nlm-citation></ref><ref id="ref43"><label>43</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Zygmont</surname><given-names>C</given-names> </name><name name-style="western"><surname>Smith</surname><given-names>MR</given-names> </name></person-group><article-title>Robust factor analysis in the presence of normality violations, missing data, and outliers: Empirical questions and possible solutions</article-title><source>TQMP</source><year>2014</year><volume>10</volume><issue>1</issue><fpage>40</fpage><lpage>55</lpage><pub-id pub-id-type="doi">10.20982/tqmp.10.1.p040</pub-id></nlm-citation></ref><ref id="ref44"><label>44</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Yang</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Liang</surname><given-names>X</given-names> </name></person-group><article-title>Confirmatory factor analysis under violations of distributional and structural assumptions</article-title><source>IJQRE</source><year>2013</year><volume>1</volume><issue>1</issue><fpage>61</fpage><pub-id pub-id-type="doi">10.1504/IJQRE.2013.055642</pub-id></nlm-citation></ref><ref id="ref45"><label>45</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Lai</surname><given-names>K</given-names> </name><name name-style="western"><surname>Green</surname><given-names>SB</given-names> </name></person-group><article-title>The problem with having two watches: assessment of fit when RMSEA and CFI disagree</article-title><source>Multivariate Behav Res</source><year>2016</year><volume>51</volume><issue>2-3</issue><fpage>220</fpage><lpage>239</lpage><pub-id pub-id-type="doi">10.1080/00273171.2015.1134306</pub-id><pub-id pub-id-type="medline">27014948</pub-id></nlm-citation></ref><ref id="ref46"><label>46</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Franchignoni</surname><given-names>F</given-names> </name><name name-style="western"><surname>Mora</surname><given-names>G</given-names> </name><name name-style="western"><surname>Giordano</surname><given-names>A</given-names> </name><name name-style="western"><surname>Volanti</surname><given-names>P</given-names> </name><name name-style="western"><surname>Chi&#x00F2;</surname><given-names>A</given-names> </name></person-group><article-title>Evidence of multidimensionality in the ALSFRS-R Scale: a critical appraisal on its measurement properties using Rasch analysis</article-title><source>J Neurol Neurosurg Psychiatry</source><year>2013</year><month>12</month><volume>84</volume><issue>12</issue><fpage>1340</fpage><lpage>1345</lpage><pub-id pub-id-type="doi">10.1136/jnnp-2012-304701</pub-id><pub-id pub-id-type="medline">23516308</pub-id></nlm-citation></ref><ref id="ref47"><label>47</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Boothroyd</surname><given-names>L</given-names> </name><name name-style="western"><surname>Dagnan</surname><given-names>D</given-names> </name><name name-style="western"><surname>Muncer</surname><given-names>S</given-names> </name></person-group><article-title>PHQ-9: One factor or two?</article-title><source>Psychiatry Res</source><year>2019</year><month>01</month><volume>271</volume><fpage>532</fpage><lpage>534</lpage><pub-id pub-id-type="doi">10.1016/j.psychres.2018.12.048</pub-id><pub-id pub-id-type="medline">30553100</pub-id></nlm-citation></ref><ref id="ref48"><label>48</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Rosa</surname><given-names>C</given-names> </name><name name-style="western"><surname>Marsch</surname><given-names>LA</given-names> </name><name name-style="western"><surname>Winstanley</surname><given-names>EL</given-names> </name><name name-style="western"><surname>Brunner</surname><given-names>M</given-names> </name><name name-style="western"><surname>Campbell</surname><given-names>ANC</given-names> </name></person-group><article-title>Using digital technologies in clinical trials: current and future applications</article-title><source>Contemp Clin Trials</source><year>2021</year><month>01</month><volume>100</volume><fpage>106219</fpage><pub-id pub-id-type="doi">10.1016/j.cct.2020.106219</pub-id><pub-id pub-id-type="medline">33212293</pub-id></nlm-citation></ref><ref id="ref49"><label>49</label><nlm-citation citation-type="web"><article-title>MPower public researcher portal</article-title><source>mPower mobile Parkinson Disease study</source><access-date>2025-10-29</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.synapse.org/Synapse:syn4993293/wiki/247859">https://www.synapse.org/Synapse:syn4993293/wiki/247859</ext-link></comment></nlm-citation></ref><ref id="ref50"><label>50</label><nlm-citation citation-type="web"><article-title>Brighten: bridging research innovations for greater health in technology, emotion, and neuroscience</article-title><source>Brighten Study Public Researcher Portal</source><access-date>2025-10-29</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.synapse.org/Synapse:syn10848316/wiki/548727">https://www.synapse.org/Synapse:syn10848316/wiki/548727</ext-link></comment></nlm-citation></ref></ref-list><app-group><supplementary-material id="app1"><label>Multimedia Appendix 1</label><p>Description of datasets.</p><media xlink:href="jmir_v27i1e70314_app1.docx" xlink:title="DOCX File, 18 KB"/></supplementary-material><supplementary-material id="app2"><label>Multimedia Appendix 2</label><p>Description of statistical analysis methods.</p><media xlink:href="jmir_v27i1e70314_app2.docx" xlink:title="DOCX File, 99 KB"/></supplementary-material></app-group></back></article>