<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="research-article"><front><journal-meta><journal-id journal-id-type="nlm-ta">J Med Internet Res</journal-id><journal-id journal-id-type="publisher-id">jmir</journal-id><journal-id journal-id-type="index">1</journal-id><journal-title>Journal of Medical Internet Research</journal-title><abbrev-journal-title>J Med Internet Res</abbrev-journal-title><issn pub-type="epub">1438-8871</issn><publisher><publisher-name>JMIR Publications</publisher-name><publisher-loc>Toronto, Canada</publisher-loc></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">v28i1e80450</article-id><article-id pub-id-type="doi">10.2196/80450</article-id><article-categories><subj-group subj-group-type="heading"><subject>Original Paper</subject></subj-group></article-categories><title-group><article-title>Electrocardiogram-Based Mental Stress Detection Amid Everyday Activities Using Machine Learning: Model Development and Validation Study</article-title></title-group><contrib-group><contrib contrib-type="author" corresp="yes"><name name-style="western"><surname>Uendes</surname><given-names>Buelent</given-names></name><degrees>MSc</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Antonides</surname><given-names>Alex</given-names></name><degrees>MSc</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>van de Ven</surname><given-names>Sjors</given-names></name><degrees>MSc</degrees><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>van der Mee</surname><given-names>Denise Johanna</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>de Geus</surname><given-names>Eco</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Hoogendoorn</surname><given-names>Mark</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib></contrib-group><aff id="aff1"><institution>Department of Computer Science, Vrije Universiteit Amsterdam</institution><addr-line>De Boelelaan 1111</addr-line><addr-line>Amsterdam</addr-line><country>The Netherlands</country></aff><aff id="aff2"><institution>Department of Biological Psychology, Vrije Universiteit Amsterdam</institution><addr-line>Amsterdam</addr-line><country>The Netherlands</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Coristine</surname><given-names>Andrew</given-names></name></contrib></contrib-group><contrib-group><contrib contrib-type="reviewer"><name name-style="western"><surname>Li</surname><given-names>Jieni</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Shaffi</surname><given-names>Shamnad Mohamed</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to Buelent Uendes, MSc, Department of Computer Science, Vrije Universiteit Amsterdam, De Boelelaan 1111, Amsterdam, 1081 HV, The Netherlands, 49 15221457090; <email>b.uendes@vu.nl</email></corresp></author-notes><pub-date pub-type="collection"><year>2026</year></pub-date><pub-date pub-type="epub"><day>7</day><month>4</month><year>2026</year></pub-date><volume>28</volume><elocation-id>e80450</elocation-id><history><date date-type="received"><day>10</day><month>07</month><year>2025</year></date><date date-type="rev-recd"><day>22</day><month>01</month><year>2026</year></date><date date-type="accepted"><day>23</day><month>01</month><year>2026</year></date></history><copyright-statement>&#x00A9; Buelent Uendes, Alex Antonides, Sjors van de Ven, Denise Johanna van der Mee, Eco de Geus, Mark Hoogendoorn. Originally published in the Journal of Medical Internet Research (<ext-link ext-link-type="uri" xlink:href="https://www.jmir.org">https://www.jmir.org</ext-link>), 7.4.2026. </copyright-statement><copyright-year>2026</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in the Journal of Medical Internet Research (ISSN 1438-8871), is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="https://www.jmir.org/">https://www.jmir.org/</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://www.jmir.org/2026/1/e80450"/><abstract><sec><title>Background</title><p>Frequent, sustained stress is linked to poor health and requires monitoring for early intervention. Electrocardiograms (ECG) are promising biomarkers because they can be recorded noninvasively and continuously using wearable devices. However, tracking stress with ECG is challenging because daily activities elicit responses similar to mental stress (MS), and various mental stimuli that individuals encounter complicate the use of machine learning (ML) models trained on a limited set of stressors.</p></sec><sec><title>Objective</title><p>We (1) evaluated the ability of ML models to distinguish MS episodes from a composite &#x201C;no-stress&#x201D; background, including rest and low- to moderate-intensity activities; (2) assessed their generalizability to new stressors and participants; and (3) tested robustness to lower sampling rates and fewer features, to explore their suitability for lightweight wearables.</p></sec><sec sec-type="methods"><title>Methods</title><p>We used a comprehensive ECG dataset sampled at 1000 hertz from 127 participants who underwent various mental stressors and engaged in diverse physical activities. A 30-second window was used to extract 55 features from time, frequency, nonlinear, and morphological domains. We trained a logistic regression (LR) model and an extreme gradient boosting (XGBoost) model, splitting the data into 60/20/20 for training, validation, and testing. Shapley additive explanation values were computed to explain model predictions. Additional analyses included leave-one-stressor-out; downsampling to 500, 250, and 125 hertz; a time-window sensitivity analysis; and reducing the number of features to as few as 5.</p></sec><sec sec-type="results"><title>Results</title><p>XGBoost achieved an area under the receiver operating characteristic curve (AUROC) of 0.741 (95% CI 0.701&#x2010;0.783) and an area under the precision-recall curve (AUPRC) of 0.706 (95% CI 0.658&#x2010;0.753), compared with 0.724 (95% CI 0.678&#x2010;0.772) and 0.691 (95% CI 0.639&#x2010;0.742) for LR. The mean performance difference between XGBoost and LR was 0.017 for AUROC (95% CI 0.001&#x2010;0.032) and 0.015 for AUPRC (95% CI&#x202F;&#x2212;0.001 to 0.037; clustered bootstrap analysis using 2000 participant-level resamples), suggesting that LR performs comparably to the nonlinear XGBoost model. Both models were robust to downsampling and feature reduction (10 features retained &#x003E;93% of performance). Extending the analysis window to 60 seconds improved model performance across all sampling rates, highlighting a trade-off between rapid detection and overall performance. When evaluating discrimination from physical activity, models achieved acceptable specificity for light physical activity (XGBoost: 0.787; LR: 0.794) but poor specificity for moderate physical activity (XGBoost: 0.418; LR: 0.444). Both models generalized to most unseen stressors, although performance varied across stressors, with limited transfer to the social-evaluative stressor. Feature importance analysis revealed fuzzy entropy and frequency-based features as key predictors.</p></sec><sec sec-type="conclusions"><title>Conclusions</title><p>ML models can detect MS with high sensitivity and remain robust to lower sampling rates and fewer features. Generalization to novel stressors was stressor-dependent. Importantly, our results highlight challenges in distinguishing stress-related cardiac responses from those caused by physical exertion, revealing critical limitations of single-sensor ECG approaches for MS detection.</p></sec></abstract><kwd-group><kwd>mental stress</kwd><kwd>machine learning</kwd><kwd>electrocardiography</kwd><kwd>ECG</kwd><kwd>stress detection</kwd><kwd>generalizability</kwd></kwd-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><sec id="s1-1"><title>Background</title><p>Mental stress (MS), an ever-present aspect of life, occurs when external demands exceed an individual&#x2019;s available physiological and psychological coping resources [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref2">2</xref>]. While short-term exposure to stress may enhance focus and performance [<xref ref-type="bibr" rid="ref3">3</xref>], frequent or prolonged exposure can adversely affect health, contributing to psychiatric conditions [<xref ref-type="bibr" rid="ref4">4</xref>,<xref ref-type="bibr" rid="ref5">5</xref>] and cardiovascular disease [<xref ref-type="bibr" rid="ref6">6</xref>]. Consequently, accurate and timely detection followed by effective stress management is essential to mitigate these adverse health outcomes.</p><p>Traditional stress assessment methods, such as cortisol analysis [<xref ref-type="bibr" rid="ref7">7</xref>], are burdensome. In contrast, self-report questionnaires, such as the Perceived Stress Scale [<xref ref-type="bibr" rid="ref8">8</xref>], administered retrospectively or through ecological momentary assessment, offer greater convenience but are limited in temporal granularity [<xref ref-type="bibr" rid="ref9">9</xref>]. Moreover, they may be susceptible to recall and reporting bias and less reliable among individuals with alexithymia. Effective stress detection, by contrast, should be minimally invasive and support continuous monitoring to capture early warning signs of stress [<xref ref-type="bibr" rid="ref10">10</xref>].</p><p>Given that stress modulates the activity of the autonomic nervous system (ANS) [<xref ref-type="bibr" rid="ref11">11</xref>], physiological biomarkers such as the electrocardiogram (ECG) and electrodermal activity have been widely proposed for automated stress monitoring. Furthermore, these signals are increasingly accessible through modern wearable devices, making real-time stress tracking more feasible. However, physiological signals often exhibit subtle and complex patterns that can be difficult to analyze and interpret using traditional statistical methods [<xref ref-type="bibr" rid="ref1">1</xref>].</p></sec><sec id="s1-2"><title>Objectives</title><p>Machine learning (ML) has emerged as a powerful tool for analyzing high-dimensional data, excelling at uncovering patterns and complex relationships [<xref ref-type="bibr" rid="ref12">12</xref>]. Consequently, ML has increasingly been used in recent studies to detect stress responses from the biomarkers mentioned above [<xref ref-type="bibr" rid="ref13">13</xref>-<xref ref-type="bibr" rid="ref16">16</xref>]. However, most existing studies exhibit one of the following limitations: (1) small cohorts; (2) a narrow range of mental stressors investigated; and (3) restrict MS detection to seated baseline (BL) conditions. This restriction leaves it unclear whether ML models can distinguish stress-induced ECG signals from those triggered by everyday movements, which often produce similar physiological responses. Moreover, because individuals encounter a wide range of novel stressors in daily life, models must generalize to stimuli absent from their training data. This aspect has been largely overlooked in prior work. The few studies that investigate generalization [<xref ref-type="bibr" rid="ref17">17</xref>-<xref ref-type="bibr" rid="ref19">19</xref>] typically train their ML models on one dataset and test them on another. Although informative, this approach conflates differences in participant demographics, sensor hardware, and stressor types, making it hard to isolate which factor drives any performance drop.</p><p>In this study, we address these gaps by developing and rigorously evaluating two ML models, namely logistic regression (LR) and extreme gradient boosting (XGBoost), on a large ECG dataset sampled at 1000 Hz. The dataset, collected from a controlled laboratory experiment at the Vrije Universiteit Amsterdam and published by van der Mee et al [<xref ref-type="bibr" rid="ref20">20</xref>], included 127 participants who performed both diverse mental-stress tasks and everyday activities (eg, dishwashing and walking). In this study, we define &#x201C;mental stress&#x201D; as the physiological response elicited by cognitively or emotionally challenging tasks. Subsequently, we formulated a binary MS versus no-stress classification task, where the no-stress condition includes seated BLs, recovery periods, and low-to-moderate-intensity physical activities. This design not only improves ecological validity but also tests each model&#x2019;s ability to distinguish stress&#x2010;induced ECG changes from those driven by physical exertion, without relying on additional sensors (eg, accelerometers). To assess generalization, we evaluate our models on unseen participants and on &#x201C;novel&#x201D; stressors, using a leave-one-stressor-out protocol, where we withhold each mental stressor during training and assess model performance on this specific stressor. We further investigate the robustness of our models to lower sampling frequency and the reduction of the feature set. Together, these experiments provide key insights for transitioning ECG-based MS detection from laboratory settings to real-world contexts.</p></sec></sec><sec id="s2" sec-type="methods"><title>Methods</title><sec id="s2-1"><title>Overview</title><p>The structure of the following section is in line with the TRIPOD+AI (Transparent Reporting of a Multivariable Prediction Model for Individual Prognosis or Diagnosis&#x2013;Artificial Intelligence) statement [<xref ref-type="bibr" rid="ref21">21</xref>,<xref ref-type="bibr" rid="ref22">22</xref>]. Sections of the guideline not applicable to this research were omitted for brevity.</p></sec><sec id="s2-2"><title>Data</title><p>This study is a retrospective analysis of pseudonymized data collected from a controlled laboratory experiment conducted at the Vrije Universiteit Amsterdam and published by van der Mee et al [<xref ref-type="bibr" rid="ref20">20</xref>]. The experimental conditions are shown in <xref ref-type="table" rid="table1">Table 1</xref>. In addition to ECG signals, subjective affect was assessed after each experimental condition using self-reported measures of positive and negative affect, as measured by the Maastricht Questionnaire [<xref ref-type="bibr" rid="ref23">23</xref>]. We refer to the original study by van der Mee et al [<xref ref-type="bibr" rid="ref20">20</xref>] for detailed explanations of the experimental procedure.</p><table-wrap id="t1" position="float"><label>Table 1.</label><caption><p>Experimental conditions and their associated labels used in our study. The laboratory protocol consisted of mental stress (MS) tasks, physical activities at varying intensities, baseline (BL) rest, and recovery periods, each labeled for binary classification as MS or nonstress (BL, low physical activity [LPA], and moderate physical activity [MPA]). A total of 127 healthy adults participated in the experiments conducted in the Netherlands (data collected from 2017 to 2019).</p></caption><table id="table1" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Experimental condition</td><td align="left" valign="bottom">Duration (min)</td><td align="left" valign="bottom">Label</td></tr></thead><tbody><tr><td align="left" valign="top">Standing</td><td align="char" char="." valign="top">3</td><td align="left" valign="top">LPA</td></tr><tr><td align="left" valign="top">Sitting</td><td align="char" char="." valign="top">3</td><td align="left" valign="top">BL</td></tr><tr><td align="left" valign="top">Tone avoidance</td><td align="char" char="." valign="top">4</td><td align="left" valign="top">MS</td></tr><tr><td align="left" valign="top">Recovery (sitting)</td><td align="char" char="." valign="top">2</td><td align="left" valign="top">BL</td></tr><tr><td align="left" valign="top">Short sing-a-song stress test (anticipatory)</td><td align="char" char="." valign="top">1</td><td align="left" valign="top">MS</td></tr><tr><td align="left" valign="top">Recovery (sitting)</td><td align="char" char="." valign="top">2</td><td align="left" valign="top">BL</td></tr><tr><td align="left" valign="top">Paced auditory serial addition test</td><td align="char" char="." valign="top">4</td><td align="left" valign="top">MS</td></tr><tr><td align="left" valign="top">Recovery (sitting)</td><td align="char" char="." valign="top">2</td><td align="left" valign="top">BL</td></tr><tr><td align="left" valign="top">Raven&#x2019;s Progressive Matrices</td><td align="char" char="." valign="top">4</td><td align="left" valign="top">MS</td></tr><tr><td align="left" valign="top">Walking at a natural pace</td><td align="char" char="." valign="top">2</td><td align="left" valign="top">MPA</td></tr><tr><td align="left" valign="top">Recovery (standing)</td><td align="char" char="." valign="top">2</td><td align="left" valign="top">LPA</td></tr><tr><td align="left" valign="top">Dishwashing</td><td align="char" char="." valign="top">2</td><td align="left" valign="top">MPA</td></tr><tr><td align="left" valign="top">Vacuum cleaning</td><td align="char" char="." valign="top">2</td><td align="left" valign="top">MPA</td></tr><tr><td align="left" valign="top">Recovery (sitting)</td><td align="char" char="." valign="top">2</td><td align="left" valign="top">BL</td></tr><tr><td align="left" valign="top">Tone avoidance (repeat)</td><td align="char" char="." valign="top">4</td><td align="left" valign="top">MS</td></tr><tr><td align="left" valign="top">Recovery (sitting)</td><td align="char" char="." valign="top">2</td><td align="left" valign="top">BL</td></tr><tr><td align="left" valign="top">Paced auditory serial addition test (repeat)</td><td align="char" char="." valign="top">4</td><td align="left" valign="top">MS</td></tr><tr><td align="left" valign="top">Recovery (sitting)</td><td align="char" char="." valign="top">3</td><td align="left" valign="top">BL</td></tr></tbody></table></table-wrap></sec><sec id="s2-3"><title>Participants</title><p>As outlined in the study by van der Mee et al [<xref ref-type="bibr" rid="ref20">20</xref>], participants were required to be Dutch-speaking, employed or enrolled in school, and aged 18-48 years. Exclusion criteria included a BMI above 30, high cholesterol, diabetes, liver or thyroid disease, and use of medications that affect the ANS, such as antidepressants or anticholinergics. The final sample included 127 participants (71 female and 56 male) with a mean age of 23.3 (SD 5.6) years. We refer the reader to the original study for additional details on recruitment and population. The data used in this analysis were pseudonymized.</p></sec><sec id="s2-4"><title>Outcome</title><p>For this research, the different experimental conditions, as shown in <xref ref-type="table" rid="table1">Table 1</xref>, were categorized into the following labels:</p><list list-type="bullet"><list-item><p>BL: This group includes conditions in which participants were primarily resting, such as sitting and recovery periods.</p></list-item><list-item><p>MS: This group includes conditions aimed to induce cognitive or emotional stress, such as the tone avoidance (TA) reaction time task, the sing-a-song-stress test (SSST), the paced auditory serial addition task (PASAT), and the Raven&#x2019;s Progressive Matrices (RAVEN) test. For the SSST, the labeled stress segments correspond to the anticipatory phase before singing, that is, the period during which participants are informed they will be asked to sing but have not yet begun.</p></list-item><list-item><p>Low physical activity (LPA): Standing, including recovery periods that involve standing, is classified as LPA.</p></list-item><list-item><p>Moderate physical activity (MPA): Activities involving moderate physical exertion, such as walking at a moderate pace, washing dishes, and vacuuming.</p></list-item></list></sec><sec id="s2-5"><title>Data Preparation</title><p>ECG signals are susceptible to various forms of noise and artifacts, which can obscure underlying cardiac activity and hinder accurate analysis. Thus, data preprocessing is crucial to ensure the quality and consistency of the input for the subsequent analysis.</p><p>In the first stage of data preprocessing, ECG segments unrelated to the laboratory conditions of interest were removed. These segments included the first and last minutes of the experimental setup, which captured the experimental setup and ECG lead removal, as well as any designated &#x201C;short break&#x201D; conditions. Compared with the original study, we excluded high-intensity physical activities (eg, treadmill running and stair climbing), as these activities are easily distinguishable from MS by, for instance, considering the maximum heart rate (HR) and could inflate reported model performance.</p><p>In the subsequent step, a 0.5 Hz high-pass Butterworth filter (order=5) was used to remove BL wander and slow drifts in the signal BL caused by respiration, movement, or electrode issues, as well as a power line filter (50 Hz) to attenuate noise from electrical power sources. Both filtering steps were implemented using the preprocessing pipeline provided by the NeuroKit2 package (version 0.2.7) [<xref ref-type="bibr" rid="ref24">24</xref>]. While filtering addresses signal quality issues, some segments may remain unreliable due to factors such as temporary electrode detachment caused by leads accidentally falling off during the experiment. To identify and address these segments, a signal quality index (SQI) was computed for each QRS complex. Segments with an SQI below 25% were deemed unreliable and discarded. This threshold was chosen based on visual inspection, where segments with SQI values below this level often exhibited flat lines, rendering the identification of key peaks unreliable.</p></sec><sec id="s2-6"><title>Predictors</title><sec id="s2-6-1"><title>Overview</title><p>For each participant, the data was segmented using a sliding-window approach, and features were subsequently calculated. We used a 30-second window size with a step size of 10 seconds. This window length represents the minimum duration demonstrated in prior work to yield reliable heart rate variability (HRV) estimates [<xref ref-type="bibr" rid="ref25">25</xref>], yet remains short enough for near-real-time applications. Furthermore, our window size aligns with previous research [<xref ref-type="bibr" rid="ref26">26</xref>].</p><p>Following R-peak detection and waveform delineation of the ECG signal, various features were extracted from the resulting ECG signal based on prior research:</p></sec><sec id="s2-6-2"><title>Time-Domain Features</title><p>For each time window, we extracted the normal-to-normal (NN) interval series, converted it to an instantaneous HR series (in beats per minute [BPM]), and then computed the mean, SD, minimum, and maximum of that series. These measures capture the central tendency and dispersion of the HR, which typically rise during physical exertion and in response to acute mental stressors [<xref ref-type="bibr" rid="ref27">27</xref>].</p><p>HRV features are essential markers for ANS activity [<xref ref-type="bibr" rid="ref25">25</xref>] and have been consequently linked to stress in prior research [<xref ref-type="bibr" rid="ref28">28</xref>-<xref ref-type="bibr" rid="ref30">30</xref>]. Therefore, we calculated the average value of the NN intervals (AVNN: average normal-to-normal), the SD of the NN intervals (SDNN), as well as the root-mean-square of successive differences (RMSSD) for each time window. We also included the ratio between the SDNN and the RMSSD, as it serves as a time-domain surrogate for the low-frequency to high-frequency power ratio, a standard marker of ANS activity [<xref ref-type="bibr" rid="ref31">31</xref>,<xref ref-type="bibr" rid="ref32">32</xref>].</p><p>In addition to features like AVNN and SDNN, we derived several complementary time&#x2010;domain features to capture both absolute and relative variability in NN intervals. These include NN20 and NN50, the counts of consecutive interval differences exceeding 20 milliseconds and 50 milliseconds, and their normalized counterparts, PNN20 and PNN50, as these features are helpful for stress monitoring [<xref ref-type="bibr" rid="ref33">33</xref>,<xref ref-type="bibr" rid="ref34">34</xref>].</p><p>To assess variability relative to mean HR, we calculated CVNN (SDNN/AVNN) and CVSD (RMSSD/AVNN). Finally, we quantified dispersion beyond SDs by computing the IQR of NN intervals and the median absolute difference of NN intervals.</p></sec><sec id="s2-6-3"><title>Frequency-Domain Features</title><p>To assess the spectral characteristics of the ECG signal, Welch&#x2019;s method was used to estimate the power spectral density across three frequency bands: high frequency (HF; 0.15&#x2010;0.40 Hz), very high frequency (VHF; 0.40&#x2010;0.50 Hz), and ultra-high frequency (UHF; 0.50&#x2010;1.00 Hz). Although prior studies commonly analyze only frequencies up to the HF band (0.40 Hz), this may be restrictive during stress, as acute psychological stress can elevate respiratory rate [<xref ref-type="bibr" rid="ref35">35</xref>-<xref ref-type="bibr" rid="ref37">37</xref>], potentially shifting the respiratory-driven modulation of R-to-R intervals above the HF band. Supporting this, Hernando et al [<xref ref-type="bibr" rid="ref38">38</xref>] showed that respiratory rate increases and becomes less stable during stress, and that incorporating respiratory information improves the characterization of the stress-related autonomic response.</p><p>Motivated by this evidence, we included the VHF band (0.40&#x2010;0.50 Hz), as implemented in the NeuroKit2 toolbox [<xref ref-type="bibr" rid="ref24">24</xref>], to capture high-frequency variability potentially associated with stress-related changes beyond the conventional HF range. We additionally investigated the UHF band (0.50&#x2010;1.00 Hz) in an exploratory manner, as this range may reflect high-frequency variability components that differ systematically across conditions. Lower-frequency bands, such as very low frequency (0.0033&#x2010;0.04 Hz), were not considered in this study, as their estimation would require longer time windows to be accurately captured [<xref ref-type="bibr" rid="ref31">31</xref>].</p><p>Following the approach taken by Schmidt et al [<xref ref-type="bibr" rid="ref13">13</xref>], we computed the absolute spectral power within each predefined frequency band. Then we expressed each band&#x2019;s power as a fraction of the total spectral power to quantify its proportional contribution. We also extracted the minimum, maximum, mean, SD, and entropy for each frequency band in line with the approach taken by Karthikeyan et al [<xref ref-type="bibr" rid="ref39">39</xref>].</p></sec><sec id="s2-6-4"><title>Nonlinear Features</title><p>Given the inherent nonlinearity and complexity of the MS response [<xref ref-type="bibr" rid="ref40">40</xref>], we included several nonlinear indices of HRV derived from NN intervals to capture the subtle fluctuations and irregularities in ANS activity. Following the approach taken in Tanev et al [<xref ref-type="bibr" rid="ref41">41</xref>], we included the approximate entropy [<xref ref-type="bibr" rid="ref42">42</xref>,<xref ref-type="bibr" rid="ref43">43</xref>] and fuzzy entropy [<xref ref-type="bibr" rid="ref44">44</xref>]. We excluded sample entropy [<xref ref-type="bibr" rid="ref43">43</xref>] because approximate entropy is better suited to shorter time windows [<xref ref-type="bibr" rid="ref45">45</xref>]. Furthermore, as a derivative of sample entropy, fuzzy entropy has been shown to outperform sample entropy [<xref ref-type="bibr" rid="ref46">46</xref>].</p><p>Using Poincar&#x00E9; plot analysis, we extracted the SD1 and SD2 features, along with their ratio, to gain insights into the dynamics of short-term and long-term HRV [<xref ref-type="bibr" rid="ref45">45</xref>]. Furthermore, heart rate asymmetry (HRA), that is, the asymmetrical distribution between accelerating and decelerating NN intervals, has been shown to reflect MS [<xref ref-type="bibr" rid="ref47">47</xref>]. Consequently, we extracted the area index [<xref ref-type="bibr" rid="ref48">48</xref>] as a feature of the HRA. In contrast to other HRA measures, such as Guszik&#x2019;s slope or Porta&#x2019;s index, the area index has been reported to exhibit fewer variations in short-term heartbeat intervals [<xref ref-type="bibr" rid="ref48">48</xref>].</p><p>Because the recurrence quantification analysis has been shown to reflect the sympathetic and parasympathetic nervous system [<xref ref-type="bibr" rid="ref49">49</xref>,<xref ref-type="bibr" rid="ref50">50</xref>], we extracted three features from the recurrence quantification analysis plot: average length, longest length, and the entropy of the vertical line (<italic>W</italic>, <italic>W</italic><sub>max</sub>, and <italic>W</italic><sub>En</sub>).</p><p>To assess short-term correlations in the ECG signal, we used detrended fluctuation analysis and extracted the &#x03B1;<sub>1</sub> value [<xref ref-type="bibr" rid="ref51">51</xref>], which is moderated by sympathetic activity [<xref ref-type="bibr" rid="ref52">52</xref>,<xref ref-type="bibr" rid="ref53">53</xref>]. In contrast to the &#x03B1;<sub>1</sub> value, which requires 3 to 16 beats for estimation, the &#x03B1;<sub>2</sub> value needs 16 to 64 heartbeats [<xref ref-type="bibr" rid="ref54">54</xref>], which may be problematic given our short 30-second analysis window and the variability in HR across experimental conditions. For this reason, we excluded the &#x03B1;<sub>2</sub> from our feature set.</p><p>Last, we included the HR fragmentation features proposed by Costa et al [<xref ref-type="bibr" rid="ref55">55</xref>], that is, percentage of short segments (PSS), inverse average length of segments (IALS), percentage of inflection points, and percentage of NN intervals in alternation segments, as these biomarkers have been proposed to capture the functionality of the HR control system.</p></sec><sec id="s2-6-5"><title>Morphological Features</title><p>Beyond linear and nonlinear dynamics, morphological features (ie, characteristics that describe the shape, size, or timing of ECG waves) were included to capture further nuances in cardiac response during MS. In particular, we focused on T-wave alternans (TWA), a beat-to-beat variation in the amplitude of the T-wave that reflects subtle alterations in ventricular repolarization and which has been identified as a potential marker of MS [<xref ref-type="bibr" rid="ref7">7</xref>,<xref ref-type="bibr" rid="ref56">56</xref>].</p><p>While the spectral method and the modified moving average (MMA) method are both widely used for TWA analysis, the latter was used to extract the TWA feature, as the MMA does not require participants to achieve a HR of 105 to 110 BPM [<xref ref-type="bibr" rid="ref57">57</xref>]. This is crucial, as participants in the MS or seated BL conditions may experience HRs below this threshold (<xref ref-type="fig" rid="figure1">Figure 1</xref>).</p><fig position="float" id="figure1"><label>Figure 1.</label><caption><p>Heart rate distributions across experimental conditions (127 total participants). Kernel density plots show mean heart rate (BPM) during sitting, recovery (sitting), low physical activity, mental stress, and moderate physical activity. Overlapping distributions across experimental conditions highlight the difficulty of discriminating between mental stress and nonstress conditions using mean heart rate alone. BPM: beats per minute.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="jmir_v28i1e80450_fig01.png"/></fig><p>Using the MMA approach [<xref ref-type="bibr" rid="ref58">58</xref>], we delineated and extracted the T-wave from each 30-second ECG window, numbered them sequentially, and split them into odd (1, 3, &#x2026;) and even (2, 4, &#x2026;) beats. We then applied an MMA to each group and defined TWA as the maximum amplitude difference between the odd- and even-beat templates.</p><p>The complete set of features used in this study is presented in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> [<xref ref-type="bibr" rid="ref55">55</xref>]. We excluded any 30-second window with fewer than 12 detected R peaks or with a mean HR outside the physiologically plausible range of 40&#x2010;220&#x202F;BPM during feature calculation. This exclusion removed 0.9% of all time windows.</p><p>Models were trained using ECG-derived features only to test whether ECG alone is sufficient for stress detection and to enable deployment on any wearable device without requiring personal or demographic data.</p></sec></sec><sec id="s2-7"><title>Sample Size</title><p>As illustrated in <xref ref-type="table" rid="table2">Table 2</xref>, the 30-second sliding-window approach with a 10-second shift yielded 31,408 observations.</p><table-wrap id="t2" position="float"><label>Table 2.</label><caption><p>Distribution of electrocardiogram windows across experimental conditions for mental stress (MS) detection (31,408 thirty-second windows from 127 participants; 10-second sliding-window shift). Windows were labeled as MS or nonstress conditions: baseline (BL) rest, low physical activity (LPA), or moderate physical activity (MPA).</p></caption><table id="table2" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Experimental condition</td><td align="left" valign="bottom">Sample size</td><td align="left" valign="bottom">Overall percentage (%)</td></tr></thead><tbody><tr><td align="left" valign="top">BL</td><td align="left" valign="top">10,140</td><td align="left" valign="top">32.28</td></tr><tr><td align="left" valign="top">LPA</td><td align="left" valign="top">3222</td><td align="left" valign="top">10.26</td></tr><tr><td align="left" valign="top">MPA</td><td align="left" valign="top">3703</td><td align="left" valign="top">11.79</td></tr><tr><td align="left" valign="top">MS</td><td align="left" valign="top">14,343</td><td align="left" valign="top">45.68</td></tr><tr><td align="left" valign="top">Total</td><td align="left" valign="top">31,408</td><td align="left" valign="top">100.00</td></tr></tbody></table></table-wrap></sec><sec id="s2-8"><title>Missing Data</title><p>Due to the controlled, high-quality conditions of the laboratory-based ECG data collection and the preprocessing pipelines used in this study, missing values were scarce. Out of 31,408 analysis windows (<xref ref-type="table" rid="table2">Table 2</xref>), only 50 (0.16%) windows contained missing data, all of which were attributed to the TWA feature, which requires an adequate number of detected T-peaks for reliable computation. We imputed these missing values using a k-nearest neighbors approach with a k value of 5, where neighbors were selected using Euclidean distance across all 55 standardized (as described below) features, and the missing values were imputed using the mean values of the 5 nearest neighbors. Both the imputer and the scaler were fitted exclusively on the training set to prevent data leakage.</p></sec><sec id="s2-9"><title>Analytical Methods</title><sec id="s2-9-1"><title>ML Models</title><p>In this study, we trained an LR with L2 regularization and a nonlinear tree-based model, that is, an XGBoost [<xref ref-type="bibr" rid="ref59">59</xref>] model. While the LR can only capture linear relationships, its inherent interpretability offers an advantage, especially in the health care domain, where interpretability is a key requirement [<xref ref-type="bibr" rid="ref60">60</xref>]. By contrast, we selected a tree-based model as the nonlinear approach, given its widespread use in ECG-based stress and affect detection [<xref ref-type="bibr" rid="ref13">13</xref>,<xref ref-type="bibr" rid="ref61">61</xref>-<xref ref-type="bibr" rid="ref63">63</xref>]. Specifically, we selected XGBoost because gradient-boosting methods have demonstrated strong performance across various tabular benchmarks, matching or surpassing neural network architectures [<xref ref-type="bibr" rid="ref64">64</xref>,<xref ref-type="bibr" rid="ref65">65</xref>]. By evaluating both ML models side-by-side, we can directly assess the trade-off between more interpretable linear models and more complex ones in the context of ECG-based MS detection. To confirm the stability of the results, we also included results from a random forest (RF) model to assess robustness.</p></sec><sec id="s2-9-2"><title>Model Development</title><p>We partitioned participants into training (75/125 participants, 60%), validation (26/125 participants, 20%), and test (26/125 participants, 20%) cohorts. For continuous features, we used z-standardization, whereas for discrete features (NN20, NN50, and <italic>W</italic><sub>max</sub>), we applied min-max normalization across the training dataset. We used global rather than participant-specific feature normalization, as global normalization enables assessment of whether ML models can generalize to unseen individuals without requiring per-user calibration data. Hyperparameters for the ML models (LR, XGBoost, and RF) were optimized using Bayesian optimization with Gaussian processes (Optuna version 4.2.1; Preferred Networks, Inc) over 25 iterations; the full hyperparameter tuning ranges and selected values are detailed in <xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>.</p></sec><sec id="s2-9-3"><title>Model Evaluation</title><p>We quantified model performance using the area under the receiver operating characteristic (AUROC) curve and the area under the precision-recall curve (AUPRC) curve&#x2014;the former capturing overall class separation, while the latter emphasizes stress detection performance directly, which is of particular importance in our study. In line with [<xref ref-type="bibr" rid="ref66">66</xref>], we interpret 0.60&#x2010;0.69 as moderate, 0.70&#x2010;0.79 as acceptable, 0.80&#x2010;0.89 as good, and &#x2265;0.90 as excellent. To evaluate how the ML models would perform in practice, we also report the <italic>F</italic><sub>1</sub>-score, as well as sensitivity and specificity. For the latter metrics, we used the threshold that maximized the <italic>F</italic><sub>1</sub>-score on the validation dataset. We computed 95% CIs for all metrics using participant-level cluster bootstrapping (2000 bootstrap samples), in which each bootstrap sample resampled participants within the test dataset with replacement and included all corresponding observations for each selected participant.</p></sec><sec id="s2-9-4"><title>Sampling-Rate Robustness</title><p>Our ECG recordings were acquired at 1000 Hz, which exceeds the rates of commonly used datasets [<xref ref-type="bibr" rid="ref13">13</xref>,<xref ref-type="bibr" rid="ref61">61</xref>,<xref ref-type="bibr" rid="ref67">67</xref>]. Using the NeuroKit resampling method, we downsampled our ECG signal to 500, 250, and 125 Hz. This procedure quantifies the effects of reduced temporal resolution on model performance, facilitates direct comparison with earlier work, and evaluates the feasibility for consumer-grade devices, such as the Hexoskin Proshirt (Carr&#x00E9; Technologies Inc) [<xref ref-type="bibr" rid="ref68">68</xref>], which operates at lower sampling rates. Furthermore, investigating whether the performance of MS detection is affected by the sampling rate has important implications and considerations for the design of memory-efficient devices.</p></sec><sec id="s2-9-5"><title>Time-Window Robustness</title><p>To investigate the trade-off between the desire to capture MS in near-real-time (ie, shorter time windows) and the requirement to have sufficiently long windows to generate reliable ECG features, we compared model performance using our standard 30-second segments against extended 60-second segments. Importantly, we used a 20-second time shift for the latter to maintain a constant overall window overlap across both settings. This setup allows us to subsequently investigate how the window size affects model performance.</p></sec><sec id="s2-9-6"><title>Model Explainability and Calibration</title><p>To explain the predictions of our ML models, we computed Shapley Additive Explanations (SHAP) [<xref ref-type="bibr" rid="ref69">69</xref>] values. SHAP values are based on a game-theoretic approach to calculate a feature&#x2019;s importance concerning the overall model prediction. For the LR model, we report, in addition to the SHAP values, the model coefficients.</p><p>Model calibration (ie, how well the provided model probabilities align with the actual probabilities of the outcomes) is particularly critical in the health care domain [<xref ref-type="bibr" rid="ref66">66</xref>,<xref ref-type="bibr" rid="ref70">70</xref>,<xref ref-type="bibr" rid="ref71">71</xref>]. We assess calibration using calibration curves, the Brier score [<xref ref-type="bibr" rid="ref72">72</xref>], and the Brier skill score. To obtain the latter, we compare the Brier scores of our ML models to those of a simple majority classifier. These analyses enable us to evaluate not only the predictive performance but also the reliability of the probabilities provided by our ML models. Following [<xref ref-type="bibr" rid="ref73">73</xref>], we consider a model with a Brier score &#x003C;0.20 to be well-calibrated. We further use isotonic regression to post hoc calibrate the model probabilities.</p></sec><sec id="s2-9-7"><title>Parsimony</title><p>As previously outlined, and motivated by prior research, we engineered a total of 55 features spanning time, frequency, nonlinear, and morphological domains to capture physiological responses to MS. However, manual feature generation is a time-consuming process. To quantify the trade-off between model simplicity and predictive performance, we evaluated model performance using progressively smaller feature sets. We applied forward selection to choose subsets of 5, 10, and 20 features by iteratively adding features that maximally improved the overall model performance, while retraining each model at each step of the feature selection process. Compared with alternative feature selection methods such as principal component analysis [<xref ref-type="bibr" rid="ref74">74</xref>] or mutual information gain, which calculates dependency between the predictor and outcome variable, the forward-selection method directly optimizes for predictive performance during the feature selection process by iteratively adding features that provide the greatest incremental benefit. This process inherently accounts for feature dependencies, as each selection is made conditional on the features already included. Our progressive reduction from 55 to 5 features allows us to quantify the trade-off between model simplicity and performance, while evaluating the extent to which a compact feature subset can maintain classification performance. We note that the forward selection process was performed on a single validation split (see section &#x201C;Model Development&#x201D;). The specific feature subset composition may therefore vary across different validation splits. However, the relatively large sample size of our study (127 participants; 31,408 windows) may partially mitigate, though not fully eliminate, the risk of overfitting to a specific data split. Nevertheless, we caution against overinterpreting the feature subsets.</p></sec><sec id="s2-9-8"><title>Generalizability Across Stressors</title><p>Although our dataset encompasses a variety of different mental stressors (social stressors, cognitive stressors, and active coping), individuals can face a vast majority of varying stressors in their daily lives. To assess whether our models learn a general MS pattern and can generalize to unseen stimuli, we used a leave-one-stressor-out scheme. For each stressor category, we trained models on all but the held-out mental stressor and tested their performance on the unseen stimulus. This analysis can thereby give valuable insights into our model&#x2019;s ability to generalize to novel stress types, a critical requirement for deploying ML for MS detection to ambulatory settings.</p><p>We implemented the analysis in Python (version 3.11) and used artificial intelligence tools to accelerate the programming process. Importantly, all output was carefully reviewed, and the code was double-checked and tested for correctness. We release the source code used for this study publicly on GitHub [<xref ref-type="bibr" rid="ref75">75</xref>].</p></sec></sec><sec id="s2-10"><title>Class Imbalance</title><p>Although the class distribution between the MS and nonstress states (BL, LPA, and MPA) is relatively balanced (45.68% MS vs 54.32% nonstress; <xref ref-type="table" rid="table2">Table 2</xref>), the leave-one-stressor-out analyses exhibit more pronounced imbalance, as specific mental stressors are left out during training, resulting in a class distribution of approximately 33.33% MS versus 66.67% nonstress examples (when PASAT or TA stimuli are left out). To overcome the class imbalance, we applied the synthetic minority oversampling technique (SMOTE) [<xref ref-type="bibr" rid="ref76">76</xref>] exclusively to the training set to upsample the minority class, that is, the MS condition. SMOTE is an oversampling method that creates new synthetic instances of the minority class by interpolating between neighboring minority examples. To maintain a consistent training pipeline, we also used SMOTE in the main analysis, although it had a negligible impact on model performance in the more balanced MS versus nonstress setting (AUROC with vs without SMOTE: 0.741 vs 0.742 for XGBoost; 0.724 vs 0.724 for LR).</p></sec><sec id="s2-11"><title>Model Output</title><p>We framed our primary outcome as a binary classification task: detecting MS episodes across all nonstress states, including seated BLs, recovery periods, and LPA to MPA. In this way, our nonstress class reflects both desk-bound environments, such as office work in front of a computer, as well as more mobile contexts, such as those found in the teaching or nursing profession. By incorporating both dynamic and seated non-MS conditions, our models must learn to distinguish ECG changes induced by MS from those resulting from resting or physical activity&#x2014;an inherently more complex task than handling either condition alone.</p></sec><sec id="s2-12"><title>Ethical Considerations</title><p>Given that this study represents a retrospective analysis of precollected data, ethical approval was not required under the Dutch Medical Research Involving Human Subjects Act (WMO) [<xref ref-type="bibr" rid="ref77">77</xref>]. However, the study from which the experimental data were obtained [<xref ref-type="bibr" rid="ref20">20</xref>] was approved by the Medical Ethical Committee of the Vrije Universiteit Amsterdam Medical Center (METc VUmc #2017.374, ABR #NL62442.029.17). All participants provided written informed consent before the start of the experiment and were either compensated with research credits (for student participants) or with a &#x20AC;50 (US $57.92) gift voucher. All data were pseudonymized.</p></sec></sec><sec id="s3" sec-type="results"><title>Results</title><sec id="s3-1"><title>Exploratory ECG Signal Analysis</title><p>Before evaluating our models, we first examine 2 fundamental ECG biomarkers, namely mean HR and AVNN reactivity, to illustrate the intrinsic challenge of detecting MS in everyday routine activities. Mean HR was selected for its simplicity of calculation from an ECG signal. In contrast, HR reactivity (or its inverse, AVNN reactivity) was chosen for its link to cardiac sympathetic and parasympathetic tone and its reliability in recognizing MS in experimental conditions [<xref ref-type="bibr" rid="ref34">34</xref>]; however, because it relies on a controlled BL measurement, it cannot be defined in unconstrained, fully ambulatory settings.</p><p><xref ref-type="fig" rid="figure1">Figure 1</xref> shows the kernel density estimates of mean HR for five experimental conditions: seated BL, recovery (sitting), low- and moderate-intensity activity, MS, and high-intensity activity. As expected, increasing physical activity leads to a corresponding increase in mean HR. Crucially, the distribution for MS (blue) overlaps extensively with both low- and moderate-intensity activity (green and yellow) and even with the resting peaks (BL and recovery in gray). This pronounced overlap underscores the difficulty of disentangling MS-induced changes from those arising during everyday activity or recovery based on a simple measurement such as HR.</p><p><xref ref-type="fig" rid="figure2">Figure 2</xref> shows the AVNN reactivity, calculated as the difference between AVNN during each experimental stressor and the seated BL, across all MS conditions. Although some individuals exhibit positive AVNN reactivity, represented by the mean change from the seated BL, was significantly negative across all tasks, indicating an overall decline in AVNN compared with BL (PASAT: <italic>t</italic><sub>126</sub>=&#x2212;7.66, <italic>P</italic>&#x003C;.001; PASAT [repeat]: <italic>t</italic><sub>126</sub>=&#x2212;12.02, <italic>P</italic>&#x003C;.001; RAVEN: <italic>t</italic><sub>126</sub>=&#x2212;3.19, <italic>P</italic>=.002; SSST: <italic>t</italic><sub>126</sub>=&#x2212;11.92, <italic>P</italic>&#x003C;.001; TA: <italic>t</italic><sub>126</sub>=&#x2212;8.20, <italic>P</italic>&#x003C;.001; TA [repeat]: <italic>t</italic><sub>126</sub>=&#x2212;13.09, <italic>P</italic>&#x003C;.001 for 2-sided paired <italic>t</italic> tests). However, the magnitude of this change varied considerably across experimental conditions. For instance, TA repeat elicits the most considerable mean AVNN reduction, whereas RAVEN matrices produce a more modest decline.</p><fig position="float" id="figure2"><label>Figure 2.</label><caption><p>AVNN interval reactivity in ms across mental stressors. AVNN reactivity was calculated for each participant (127 participants) as the average NN interval during the mental stressor minus the average NN interval during the sitting baseline period (ie, the inverse of the commonly used HR reactivity). Statistical significance was assessed using a paired <italic>t</italic> test. Test statistics: PASAT: <italic>t</italic><sub>126</sub>=&#x2013;7.66, <italic>P</italic>&#x003C;.001; PASAT (repeat): <italic>t</italic><sub>126</sub>=&#x2013;12.02, <italic>P</italic>&#x003C;.001; RAVEN: <italic>t</italic><sub>126</sub>=&#x2013;3.19, <italic>P</italic>=.002; SSST: <italic>t</italic><sub>126</sub>=&#x2013;11.92, <italic>P</italic>&#x003C;.001; TA: <italic>t</italic><sub>126</sub>=&#x2013;8.20, <italic>P</italic>&#x003C;.001; TA (repeat): <italic>t</italic><sub>126</sub>=&#x2013;13.09, <italic>P</italic>&#x003C;.001 for 2-sided paired <italic>t</italic> tests. AVNN: average normal-to-normal; HR: heart rate; ms: milliseconds; NN: normal-to-normal; PASAT: paced auditory serial addition task; RAVEN: Raven&#x2019;s Progressive Matrices; SSST: sing-a-song-stress test; TA: tone avoidance.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="jmir_v28i1e80450_fig02.png"/></fig><p>Taken together, these figures demonstrate that neither mean HR nor HR reactivity (or its inverse AVNN reactivity) alone can reliably discriminate MS from everyday activity or account for heterogeneity across stress tasks and individuals. This motivates our extraction of 55 time-, frequency-, nonlinear, and morphological ECG features, and the use of ML models to develop an MS classifier that detects physiological patterns characteristic of MS.</p><p>The large interindividual variability in ECG responses underscores that MS reactions are person-specific, likely influenced by subjective perceptions of stress. In <xref ref-type="supplementary-material" rid="app3">Multimedia Appendix 3</xref>, we also show changes in both positive and negative affect relative to the sitting BL. As expected, positive affect significantly decreased across all mental stressors (PASAT: <italic>t</italic><sub>126</sub>=&#x2212;13.25, <italic>P</italic>&#x003C;.001; PASAT (repeat): <italic>t</italic><sub>126</sub>=&#x2212;13.45, <italic>P</italic>&#x003C;.001; RAVEN: <italic>t</italic><sub>126</sub>=&#x2212;7.96, <italic>P</italic>&#x003C;.001; SSST: <italic>t</italic><sub>126</sub>=&#x2212;4.65, <italic>P</italic>&#x003C;.001; TA: <italic>t</italic><sub>126</sub>=&#x2212;7.71, <italic>P</italic>&#x003C;.001; TA [repeat]: <italic>t</italic><sub>126</sub>=&#x2212;5.00, <italic>P</italic>&#x003C;.001 for 2-sided paired <italic>t</italic> tests). Likewise, negative affect significantly increased compared with the sitting BL (PASAT: <italic>t</italic><sub>126</sub>=&#x2212;12.00, <italic>P</italic>&#x003C;.001; PASAT [repeat]: <italic>t</italic><sub>126</sub>=&#x2212;6.57, <italic>P</italic>&#x003C;.001; RAVEN: <italic>t</italic><sub>126</sub>=&#x2212;6.09, <italic>P</italic>&#x003C;.001; SSST: <italic>t</italic><sub>126</sub>=&#x2212;6.67, <italic>P</italic>&#x003C;.001; TA: <italic>t</italic><sub>126</sub>=&#x2212;8.31, <italic>P</italic>&#x003C;.001; TA [repeat]: <italic>t</italic><sub>126</sub>=&#x2212;3.21, <italic>P</italic>=.002 for 2-sided paired <italic>t</italic> tests). Nonetheless, apparent individual differences emerged, highlighting the challenge of building models that generalize across both stressors and individuals.</p></sec><sec id="s3-2"><title>Classification Performance</title><sec id="s3-2-1"><title>Model Performance Across Sampling Rates</title><p><xref ref-type="fig" rid="figure3">Figure 3</xref> compares LR and XGBoost on our MS detection task at 1000 Hz and after downsampling to 500, 250, and 125 Hz. Both classifiers achieve acceptable AUROC scores, with XGBoost outperforming the LR counterpart across all frequencies (XGBoost: 0.741, 95% CI 0.701&#x2010;0.783; LR: 0.724, 95% CI 0.678&#x2010;0.772) at 1000 Hz. Interestingly, AUROC performance remains stable across all sampling rates for both models, suggesting that sampling at higher rates does not yield performance gains. In <xref ref-type="supplementary-material" rid="app4">Multimedia Appendix 4</xref>, we report corresponding AUPRC scores, which show similar trends and qualitative results. To estimate the performance difference between XGBoost and LR, we computed the mean pairwise differences in AUROC and AUPRC using 2000 participant-level cluster bootstrap samples, alongside 95% and 99% CIs to quantify uncertainty around these estimates. The results are reported in <xref ref-type="supplementary-material" rid="app5">Multimedia Appendix 5</xref>. We chose the bootstrap approach over the DeLong test [<xref ref-type="bibr" rid="ref78">78</xref>] because the latter assumes independent samples [<xref ref-type="bibr" rid="ref79">79</xref>], which is violated in our setup due to within-participant correlation among the time windows. As shown in <xref ref-type="supplementary-material" rid="app5">Multimedia Appendix 5</xref>, the mean performance difference (<italic>&#x0394;</italic>) between XGBoost and LR was 0.017 for AUROC (95% CI 0.001&#x2010;0.032) and 0.015 for AUPRC (95% CI &#x2212;0.007 to 0.037), using 2000 bootstrap samples. These differences are small in magnitude (&#x003C;0.02), suggesting that both models perform comparably across sampling rates. At the <italic>F</italic><sub>1</sub>-maximizing threshold (1000 Hz), XGBoost achieved a sensitivity of 0.800 (95% CI 0.698&#x2010;0.889) but a specificity of only 0.512 (95% CI 0.420&#x2010;0.609); LR performed similarly (sensitivity: 0.782, 95% CI 0.671&#x2010;0.878; specificity: 0.509, 95% CI 0.417&#x2010;0.613). This asymmetry indicates that, although both models reliably detect MS, approximately half of the nonstress segments are misclassified, highlighting a critical challenge for deployment. Complete results across all sampling rates, along with <italic>F</italic><sub>1</sub>-scores and RF results, are provided in <xref ref-type="supplementary-material" rid="app6">Multimedia Appendix 6</xref>.</p><fig position="float" id="figure3"><label>Figure 3.</label><caption><p>Performance comparison of logistic regression and extreme gradient boosting for electrocardiogram-based mental stress classification across sampling rates (127 total participants, 26 test set participants). Points represent bootstrapped mean AUROC with 95% CIs (error bars) from 2000 participant-level bootstrap samples. Models were trained on 55 features extracted from 30-second windows (10-second shift) using 60/20/20 (train/validation/test) splits at the individual level. Both models demonstrate robustness to downsampling from 1000 to 125 Hz. AUROC: area under the receiver operating characteristic curve.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="jmir_v28i1e80450_fig03.png"/></fig></sec><sec id="s3-2-2"><title>Model Performance Under Simplified Conditions</title><p>To assess the impact of our inherently more challenging classification setup, we retrained our ML models on a simplified task: distinguishing MS from a seated BL. The corresponding results are reported in <xref ref-type="supplementary-material" rid="app7">Multimedia Appendix 7</xref>. As can be seen, model performances improve substantially under these simplified settings: bootstrapped mean AUROC for XGBoost increases from 0.741 to 0.766, and from 0.724 to 0.752 for the LR at a sampling rate of 1000 Hz (with similar increases for the other sampling rates). The corresponding AUPRC improvements are even more substantial, underlining the challenge our setup poses for MS detection (especially without reliance on accelerometers).</p><p>As physiological recovery from stressors is not instantaneous, the recovery periods following each experimental condition (<xref ref-type="table" rid="table1">Table 1</xref>) likely contain residual autonomic arousal. To investigate the impact of this on model performance, we conducted a sensitivity analysis excluding recovery periods from the nonstress class. The AUROC performance remained stable (LR: 0.728, 95% CI 0.671&#x2010;0.787; XGBoost: 0.751, 95% CI 0.699&#x2010;0.806 at sampling rate of 1000 Hz), whereas the AUPRC improved substantially for both models (LR: 0.825, 95% CI 0.782&#x2010;0.867; XGBoost: 0.848, 95% CI 0.811&#x2010;0.885), suggesting that including recovery periods introduced label noise negatively affecting AUPRC, likely due to residual autonomic arousal from preceding stressors. However, because distinguishing stress from poststress recovery is an important and realistic challenge in detecting MS in everyday life, we retained recovery periods in the main analysis.</p></sec><sec id="s3-2-3"><title>Time-Window Robustness</title><p>In <xref ref-type="supplementary-material" rid="app8">Multimedia Appendix 8</xref>, we compare AUROC and AUPRC scores for 30-second versus 60-second windows for both models. Extending the time window yields consistent performance gains for both LR and XGBoost across all frequencies, highlighting the trade-off between quick stress detection (short time windows) and improved model performance (longer windows). For instance, at a 1000 Hz sampling rate, doubling the window from 30 to 60 seconds increased XGBoost&#x2019;s bootstrapped mean AUROC from 0.741 to 0.759 (and AUPRC from 0.706 to 0.742), while LR&#x2019;s mean AUROC rose from 0.724 to 0.747 (AUPRC from 0.691 to 0.730).</p></sec><sec id="s3-2-4"><title>Instance-Level Agreement</title><p>We further assessed Cohen kappa regarding the model agreement on individual test instances. The substantial agreement of 0.735 (95% CI 0.695&#x2010;0.775) suggests that LR and XGBoost largely agree on an instance-level, which ECG responses reflect MS.</p></sec></sec><sec id="s3-3"><title>Model Explainability and Model Calibration</title><p>We next examined feature importance and model calibration, that is, how well the predicted probabilities represent the true probability of the outcomes. <xref ref-type="fig" rid="figure4">Figure 4</xref> shows the 10 most important predictors for mental-stress detection in the XGBoost model according to the SHAP values. Notably, 5 out of these features originate from the frequency domain, highlighting the importance of spectral features in stress detection. Two heart-rate fragmentation indices (PSS and IALS) and fuzzy entropy also rank highly, with higher values of these nonlinear features pushing XGBoost&#x2019;s output towards the MS class.</p><p>Interestingly, maximum HR and NN20, 2 simple time-domain features, are also important for predicting MS, demonstrating the relevance of time-domain metrics.</p><p>In <xref ref-type="supplementary-material" rid="app9">Multimedia Appendix 9</xref>, we present the corresponding SHAP values for the LR model, along with its feature rankings based on the absolute values of the model coefficients. Fuzzy entropy emerges as the most important predictor of MS, underscoring its significance.</p><p>IALS again appears to be a crucial feature for MS detection, thereby confirming the importance of heart-rate fragmentation indices and underscoring the results obtained with the XGBoost model. Among time and frequency domain features, total and maximum power in the HF band, SD of HR, and PNN20 are also of high importance for the LR model.</p><fig position="float" id="figure4"><label>Figure 4.</label><caption><p>SHAP plot for the detection of mental stress versus no-mental-stress for the extreme gradient boosting model. The top 10 most important features are presented in descending order of importance. Each dot represents a Shapley value for a specific instance and feature, with the color indicating the underlying feature value ranging from high (red) to low (blue). HF: high frequency (0.15-0.40 Hz); HR: heart rate; IALS: inverse average length of segments; NN20: number of (normal-to-normal) NN intervals differing by more than 20 milliseconds; PSS: percentage of short segments; SHAP: Shapley additive explanation; VHF: very high frequency (0.40-0.50 Hz).</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="jmir_v28i1e80450_fig04.png"/></fig><p>The corresponding model-explainability results for the MS versus the seated nonstress condition (<xref ref-type="supplementary-material" rid="app9">Multimedia Appendix 9</xref>; <xref ref-type="fig" rid="figure3">Figures 3</xref><xref ref-type="fig" rid="figure4"/>-<xref ref-type="fig" rid="figure5">5</xref>) also highlight the importance of fuzzy entropy, as well as both the high-frequency and very high-frequency components, as predictive features.</p><p>In the health care domain, accurate probability estimates are crucial for informed decision-making [<xref ref-type="bibr" rid="ref66">66</xref>,<xref ref-type="bibr" rid="ref70">70</xref>]. For this reason, we assessed model calibration using the Brier score and the expected calibration error with 10 bins.</p><p>LR achieves a bootstrapped mean Brier score of 0.215 (95% CI 0.199&#x2010;0.232), while XGBoost scored 0.209 (95% CI 0.194&#x2010;0.224), thereby falling just above our &#x003C;0.20 cutoff for well-calibrated probabilities. However, the AUROC for the XGBoost model is 0.615 (95% CI 0.586&#x2010;0.643), and 0.603 (95% CI 0.572&#x2010;0.633) for the LR, indicating substantial relative improvements over the dummy classifier BL that predicts the majority class (MS). <xref ref-type="fig" rid="figure5">Figure 5</xref> shows the calibration curves for both classifiers, indicating slight underconfidence at low prediction probabilities (0.05&#x2010;0.25).</p><fig position="float" id="figure5"><label>Figure 5.</label><caption><p>Calibration curves for the logistic regression and extreme gradient boosting models, evaluated on the held-out test set (127 total participants and 26 test participants). Points show the predicted model probabilities for mental stress versus the observed probability of mental stress (y-axis), across 10 bins. The dotted line indicates perfect calibration. ECE quantifies deviation from perfect calibration, with lower values indicating better calibration. ECE: expected calibration error.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="jmir_v28i1e80450_fig05.png"/></fig></sec><sec id="s3-4"><title>Feature Parsimony</title><p>Generating the full set of 55 features is time-consuming and increases memory demands, which are both undesirable for resource-constrained wearable devices. Moreover, a large feature set can obscure model interpretability. To assess the trade-off between model simplicity and predictive performance, <xref ref-type="fig" rid="figure6">Figure 6</xref> reports the AUROC as the number of features is progressively reduced. Notably, performance remains surprisingly robust, with over 95% of the original AUROC retained for both models, even with only 10 features. Interestingly, XGBoost outperforms the LR model across all feature sets. In <xref ref-type="supplementary-material" rid="app10">Multimedia Appendix 10</xref>, we report the corresponding results for the AUPRC score, which are in line with the previously stated insights. The full list of selected features for each feature set is provided in <xref ref-type="supplementary-material" rid="app11">Multimedia Appendix 11</xref>. As noted in the Methods section, feature selection was conducted using a single validation split. The resulting feature rankings should therefore be interpreted with caution.</p><fig position="float" id="figure6"><label>Figure 6.</label><caption><p>Performance comparison of logistic regression and extreme gradient boosting for electrocardiogram-based mental stress classification across varying feature-set sizes on the held-out test set (127 total participants and 26 test participants). Points represent bootstrapped mean AUROC with 95% CIs (error bars) from 2000 participant-level bootstrap samples. Models were trained using a 60/20/20 (train/validation/test) split at the individual level, with features selected via forward selection based on validation performance. Both models demonstrate robustness to feature reduction, maintaining &#x003E;95% of original performance with only 10 features (logistic regression: 0.707; extreme gradient boosting: 0.729) compared with the full 55-feature set (logistic regression: 0.724; extreme gradient boosting: 0.741). AUROC: area under the receiver operating characteristic curve.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="jmir_v28i1e80450_fig06.png"/></fig></sec><sec id="s3-5"><title>Generalization to Unknown Stressors</title><p>Every day, life exposes individuals to a wide variety of mental stressors, making it practically infeasible to train a model on every possible type of stimulus. Thus, a model&#x2019;s ability to perform well under unseen mental stressors is essential for real-world deployment [<xref ref-type="bibr" rid="ref17">17</xref>]. <xref ref-type="table" rid="table3">Table 3</xref> reports the AUROC for each model under a leave-one-stressor-out scheme. For each held-out stressor, the model is trained on all remaining stressors plus the nonstress condition and then tested on its ability to discriminate the held-out stressor from the nonstress condition. Note that if a mental stressor had a repeat condition in the experimental protocol (eg, TA and PASAT), both stressors were left out during training of the ML models. To ensure these results accurately reflect the difficulty of generalizing to an unseen stressor, rather than just the effect of having fewer stressor types and training examples, we also report, for each left-out mental stressor, the BL AUROC on the remaining (known) stressors.</p><table-wrap id="t3" position="float"><label>Table 3.</label><caption><p>Bootstrapped mean area under the receiver operating characteristic curve (AUROC) with 95% CIs from 2000 participant-level bootstrap samples for binary classification of mental stress versus nonstress conditions under leave-one-stressor-out evaluation on the held-out test set (127 total participants and 26 test participants). Models were trained using a 60/20/20 (train/validation/test) split at the participant level, using a total of 55 features. For each row, the &#x201C;baseline&#x201D; column reports the AUROC performance of the model on stressors included in the training set. In contrast, &#x201C;left-out&#x201D; reports the AUROC model performance on the held-out (unseen) mental stressor, assessing each model&#x2019;s ability to generalize to novel mental stress conditions. Note that if a mental stressor had a repeat condition in the experimental protocol (eg, tone avoidance and paced auditory serial addition task), both stressors were left out during training of the machine learning models, which is why their baseline scores are identical.</p></caption><table id="table3" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Left-out mental stressor</td><td align="left" valign="bottom" colspan="6">AUROC model performance, 95% CI</td></tr><tr><td align="left" valign="bottom"/><td align="left" valign="bottom">LR<sup><xref ref-type="table-fn" rid="table3fn1">a</xref></sup> baseline</td><td align="left" valign="bottom">LR left-out</td><td align="left" valign="bottom">XGBoost<sup><xref ref-type="table-fn" rid="table3fn2">b</xref></sup> baseline</td><td align="left" valign="bottom">XGBoost left-out</td><td align="left" valign="bottom">RF<sup><xref ref-type="table-fn" rid="table3fn3">c</xref></sup> baseline</td><td align="left" valign="bottom">RF left-out</td></tr></thead><tbody><tr><td align="left" valign="top">SSST<sup><xref ref-type="table-fn" rid="table3fn4">d</xref></sup></td><td align="char" char="." valign="top">0.7218 (0.6812&#x2010;0.7660)</td><td align="char" char="." valign="top">0.5620 (0.4667&#x2010;0.6548)</td><td align="char" char="." valign="top">0.7360 (0.6975&#x2010;0.7759)</td><td align="char" char="." valign="top">0.5600 (0.4795&#x2010;0.6380)</td><td align="char" char="." valign="top">0.7413 (0.7039&#x2010;0.7792)</td><td align="char" char="." valign="top">0.5499 (0.4680&#x2010;0.6261)</td></tr><tr><td align="left" valign="top">RAVEN<sup><xref ref-type="table-fn" rid="table3fn5">e</xref></sup></td><td align="char" char="." valign="top">0.7146 (0.6789&#x2010;0.7520)</td><td align="char" char="." valign="top">0.6365 (0.5736&#x2010;0.6976)</td><td align="char" char="." valign="top">0.7365 (0.7070&#x2010;0.7671)</td><td align="char" char="." valign="top">0.6595 (0.6057&#x2010;0.7114)</td><td align="char" char="." valign="top">0.7371 (0.7061&#x2010;0.7696)</td><td align="char" char="." valign="top">0.6645 (0.6092&#x2010;0.7173)</td></tr><tr><td align="left" valign="top">PASAT<sup><xref ref-type="table-fn" rid="table3fn6">f</xref></sup></td><td align="char" char="." valign="top">0.7219 (0.6777&#x2010;0.7659)</td><td align="char" char="." valign="top">0.7035 (0.6390&#x2010;0.7673)</td><td align="char" char="." valign="top">0.7322 (0.6888&#x2010;0.7752)</td><td align="char" char="." valign="top">0.7125 (0.6490&#x2010;0.7807)</td><td align="char" char="." valign="top">0.7333 (0.6906&#x2010;0.7754)</td><td align="char" char="." valign="top">0.7150 (0.6520&#x2010;0.7816)</td></tr><tr><td align="left" valign="top">PASAT (repeat)</td><td align="char" char="." valign="top">0.7219 (0.6777&#x2010;0.7659)</td><td align="char" char="." valign="top">0.6570 (0.5711&#x2010;0.7456)</td><td align="char" char="." valign="top">0.7322 (0.6888&#x2010;0.7752)</td><td align="char" char="." valign="top">0.6691 (0.5897&#x2010;0.7518)</td><td align="char" char="." valign="top">0.7333 (0.6906&#x2010;0.7754)</td><td align="char" char="." valign="top">0.6782 (0.5985&#x2010;0.7593)</td></tr><tr><td align="left" valign="top">TA<sup><xref ref-type="table-fn" rid="table3fn7">g</xref></sup></td><td align="char" char="." valign="top">0.7123 (0.6705&#x2010;0.7558)</td><td align="char" char="." valign="top">0.7731 (0.7070&#x2010;0.8352)</td><td align="char" char="." valign="top">0.7107 (0.6710&#x2010;0.7528)</td><td align="char" char="." valign="top">0.7594 (0.6965&#x2010;0.8192)</td><td align="char" char="." valign="top">0.7087 (0.6713&#x2010;0.7492)</td><td align="char" char="." valign="top">0.7608 (0.7017&#x2010;0.8205)</td></tr><tr><td align="left" valign="top">TA (repeat)</td><td align="char" char="." valign="top">0.7123 (0.6705&#x2010;0.7558)</td><td align="char" char="." valign="top">0.7337 (0.6488&#x2010;0.8127)</td><td align="char" char="." valign="top">0.7107 (0.6710&#x2010;0.7528)</td><td align="char" char="." valign="top">0.7200 (0.6454&#x2010;0.7936)</td><td align="char" char="." valign="top">0.7087 (0.6713&#x2010;0.7492)</td><td align="char" char="." valign="top">0.7618 (0.6834&#x2010;0.8370)</td></tr></tbody></table><table-wrap-foot><fn id="table3fn1"><p><sup>a</sup>LR: logistic regression.</p></fn><fn id="table3fn2"><p><sup>b</sup>XGBoost: extreme gradient boosting.</p></fn><fn id="table3fn3"><p><sup>c</sup>RF: random forest.</p></fn><fn id="table3fn4"><p><sup>d</sup>SSST: sing-a-song-stress test.</p></fn><fn id="table3fn5"><p><sup>e</sup>RAVEN: Raven&#x2019;s Progressive Matrices.</p></fn><fn id="table3fn6"><p><sup>f</sup>PASAT: paced auditory serial addition task.</p></fn><fn id="table3fn7"><p><sup>g</sup>TA: tone avoidance.</p></fn></table-wrap-foot></table-wrap><p>As shown in <xref ref-type="table" rid="table3">Table 3</xref>, model performance on held-out stressors generally declines compared with stressors included in the training set, as expected. Nevertheless, all models achieve AUROC well above the 0.50 chance level on most unseen stressors, confirming their ability to generalize to new mental stimuli. The exception was SSST, for which the 95% CI overlaps chance performance. Notably, the magnitude of performance drop to unseen mental stressors varies across the different stimuli. While both the SSST and RAVEN are most difficult to classify correctly as MS, the opposite holds true for the TA condition. The fact that LR, XGBoost, and the RF model show comparable generalization capability suggests that differences in generalizability to novel stressors arise from the underlying ECG response, rather than from model complexity.</p><p><xref ref-type="supplementary-material" rid="app12">Multimedia Appendix 12</xref> presents AUPRC results that closely mirror the AUROC findings. SSST and RAVEN are the hardest to detect when excluded from model training. Although model performance is generally worse for the left-out stressor, the TA stressor (and its repeat condition) shows the opposite pattern: the AUPRC score when TA is held out exceeds that of other mental stressors included in training. This suggests that TA produces particularly distinctive and readily identifiable stress patterns.</p></sec><sec id="s3-6"><title>Stratified Stressor Analysis</title><p>While our leave-one-out evaluation results suggest that our ML models can overall generalize to novel mental stressors, the generalizability to unseen mental stressors varied significantly. In particular, the sharp contrast between TA and SSST, 2 mental stressors that elicit strong physiological responses as measured by AVNN reactivity (<xref ref-type="fig" rid="figure2">Figure 2</xref>), is intriguing. We therefore examined how each model performed on individual stressor types. <xref ref-type="table" rid="table4">Table 4</xref>, therefore, reports AUROC scores stratified by mental-stressor condition. Again, model performance varies by mental stressor: TA achieves AUROC&#x003E;0.75, while others (eg, RAVEN and SSST) yield only moderate performance, which suggests that certain MS stimuli produce more distinguishable ECG responses (as compared with the nonstress class) than others. Importantly, this observation holds for LR, XGBoost, and RF alike, demonstrating that these differences are not related to model complexity. The results for the AUPRC, as presented in <xref ref-type="supplementary-material" rid="app13">Multimedia Appendix 13</xref>, align with these findings.</p><table-wrap id="t4" position="float"><label>Table 4.</label><caption><p>Bootstrapped mean area under the receiver operating characteristic curve (AUROC) with 95% CIs from 2000 participant-level bootstrap samples for mental stress classification, stratified by individual stressor type and evaluated on the held-out test set (26 participants from 127 total participants). Results are shown for logistic regression (LR), extreme gradient boosting (XGBoost), and random forest (RF) trained using 60/20/20 (train/validation/test) splits at the participant level with 55 features. Each row presents model performance for a specific mental stressor, assessing how well the models detect that stressor. An AUROC score of 0.50 represents chance-level performance (ie, a random baseline that predicts the majority class).</p></caption><table id="table4" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Mental stressor</td><td align="left" valign="bottom" colspan="3">AUROC model performance, 95% CI</td></tr><tr><td align="left" valign="bottom"/><td align="left" valign="bottom">LR</td><td align="left" valign="bottom">XGBoost</td><td align="left" valign="bottom">RF</td></tr></thead><tbody><tr><td align="left" valign="top">SSST<sup><xref ref-type="table-fn" rid="table4fn1">a</xref></sup></td><td align="char" char="." valign="top">0.5902 (0.4955&#x2010;0.6792)</td><td align="char" char="." valign="top">0.5726 (0.4958&#x2010;0.6464)</td><td align="char" char="." valign="top">0.5690 (0.4926&#x2010;0.6421)</td></tr><tr><td align="left" valign="top">RAVEN<sup><xref ref-type="table-fn" rid="table4fn2">b</xref></sup></td><td align="char" char="." valign="top">0.6657 (0.6090&#x2010;0.7230)</td><td align="char" char="." valign="top">0.6829 (0.6350&#x2010;0.7312)</td><td align="char" char="." valign="top">0.6893 (0.6429&#x2010;0.7348)</td></tr><tr><td align="left" valign="top">PASAT<sup><xref ref-type="table-fn" rid="table4fn3">c</xref></sup></td><td align="char" char="." valign="top">0.7087 (0.6435&#x2010;0.7745)</td><td align="char" char="." valign="top">0.7231 (0.6607&#x2010;0.7882)</td><td align="char" char="." valign="top">0.7219 (0.6588&#x2010;0.7871)</td></tr><tr><td align="left" valign="top">PASAT (repeat)</td><td align="char" char="." valign="top">0.6667 (0.5809&#x2010;0.7558)</td><td align="char" char="." valign="top">0.6846 (0.6063&#x2010;0.7637)</td><td align="char" char="." valign="top">0.6900 (0.6099&#x2010;0.7871)</td></tr><tr><td align="left" valign="top">TA<sup><xref ref-type="table-fn" rid="table4fn4">d</xref></sup></td><td align="char" char="." valign="top">0.7998 (0.7461&#x2010;0.8540)</td><td align="char" char="." valign="top">0.8176 (0.7729&#x2010;0.8603)</td><td align="char" char="." valign="top">0.8164 (0.7692&#x2010;0.8618)</td></tr><tr><td align="left" valign="top">TA (repeat)</td><td align="char" char="." valign="top">0.7737 (0.6960&#x2010;0.8446)</td><td align="char" char="." valign="top">0.7984 (0.7290&#x2010;0.8614)</td><td align="char" char="." valign="top">0.7962 (0.7262&#x2010;0.8635)</td></tr></tbody></table><table-wrap-foot><fn id="table4fn1"><p><sup>a</sup>SSST: sing-a-song-stress test.</p></fn><fn id="table4fn2"><p><sup>b</sup>RAVEN: Raven&#x2019;s Progressive Matrices.</p></fn><fn id="table4fn3"><p><sup>c</sup>PASAT: paced auditory serial addition task.</p></fn><fn id="table4fn4"><p><sup>d</sup>TA: tone avoidance.</p></fn></table-wrap-foot></table-wrap><p>While the relatively poor model performance on the RAVEN stressor likely stems from its weaker elicited physiological responses (AVNN reactivity is displayed in <xref ref-type="fig" rid="figure2">Figure 2</xref>), the similarly low performance on SSST is more surprising. One possible explanation is that SSST elicits a fundamentally different physiological response compared with the other mental stressors, which could account for the poor generalization of the ML models to the SSST stressor observed in <xref ref-type="table" rid="table3">Table 3</xref>. To test this hypothesis, we retrained our LR and XGBoost classifiers separately on each stressor, that is, training and testing exclusively on one condition at a time. <xref ref-type="supplementary-material" rid="app14">Multimedia Appendix 14</xref> summarizes these per-stressor results, showing that both models outperform random BLs across all mental stressors. Notably, SSST shows marked improvement: despite both models struggling to detect it in earlier analyses, it now achieves an AUROC of 0.73 for LR and 0.76 for XGBoost, comparable to other mental stressors. This indicates that the SSST task elicits detectable MS responses. However, when SSST is excluded during training, AUROC drops sharply when tested on it (<xref ref-type="table" rid="table3">Table 3</xref>). Together, this indicates that while our ML models can learn to discriminate between SSST and nonstress when trained on SSST stressors alone, the patterns that the ML models learn from other mental stressors do not generalize to SSST. Interestingly, a similar pattern is also observed for the RAVEN stressor, further supporting this interpretation.</p></sec><sec id="s3-7"><title>Discrimination From Physical Activity</title><p>Given that MS can elicit ECG responses similar to those induced by physical activity (<xref ref-type="fig" rid="figure1">Figure 1</xref>), our ML models may conflate metabolic demand with MS. We therefore examined condition-specific sensitivity and specificity for light (LPA) and MPA (at 1000 sampling rates, 55 features).</p><p>The XGBoost model achieved good sensitivity for MS (0.800; 95% CI 0.698&#x2010;0.889), with the LR also yielding acceptable performance (0.772; 95% CI 0.656&#x2010;0.877). For the LPA, XGBoost achieved a specificity of 0.787 (95% CI 0.662&#x2010;0.903), whereas LR performed comparably, with a bootstrapped mean specificity of 0.794 (95% CI 0.685&#x2010;0.895). However, our models performed worse for MPA (LR: 0.444, 95% CI 0.314&#x2010;0.576; XGBoost: 0.418, 95% CI 0.299&#x2010;0.542), suggesting that even with a comprehensive feature set of 55 features, distinguishing stress-induced ECG responses from those elicited by MPA remains challenging.</p></sec></sec><sec id="s4" sec-type="discussion"><title>Discussion</title><sec id="s4-1"><title>Principal Results</title><p>We developed ML models to distinguish MS from a composite nonstress BL encompassing various everyday activities (eg, walking at a normal pace, recovering from MS, or vacuum cleaning). Using 55 features derived from the ECG signal, XGBoost achieved an AUROC of 0.741 and AUPRC of 0.706, while LR performed comparably (AUROC: 0.724; AUPRC: 0.692) at 1000 Hz. Both models achieved high sensitivity (XGBoost: 0.800; LR: 0.782) but low specificity (XGBoost: 0.512; LR: 0.509), with this pattern being particularly pronounced for MPA. Our results, therefore, highlight a fundamental challenge in distinguishing stress-induced cardiac responses from those driven by physical exertion when relying solely on ECG features. The ML models used in our study also demonstrated robustness to ECG signal downsampling, retaining more than 93% of performance using only 10 features.</p><p>While the nonlinear model (XGBoost) numerically outperformed our linear model (LR), the performance differences were small (&#x003C;0.02 difference in AUROC and AUPRC at 1000 Hz), suggesting that complex nonlinear models offer only marginal benefits relative to linear BLs. One potential explanation is the so-called Rashomon effect [<xref ref-type="bibr" rid="ref80">80</xref>], which posits that there exist many equally well-performing models for specific datasets&#x2014;a phenomenon often observed in high-stakes applications and tabular datasets [<xref ref-type="bibr" rid="ref81">81</xref>]. High outcome variability, likely present in our dataset, as physiological responses to mental stressors are both participant- and stressor-specific (<xref ref-type="fig" rid="figure2">Figure 2</xref> displays the HRV response), increases the likelihood that a simple model will perform on par with a more complex one [<xref ref-type="bibr" rid="ref82">82</xref>]. The fact that we do not apply time-series models to the raw ECG dataset using, for instance, deep learning approaches and instead work with tabular data also likely further contributes to this phenomenon.</p><p>Our findings reveal a clear trade-off between window size and model performance, as increasing the window size led to model improvements. This can be explained by more reliable features, as shortening the window size has been shown to affect the quality of HRV features [<xref ref-type="bibr" rid="ref25">25</xref>]. Deep learning approaches, such as convolutional or recurrent neural networks, can partially overcome this trade-off, as they learn feature representations directly from the ECG signals and have been shown to achieve promising performance, even using shorter window sizes than those considered in our study [<xref ref-type="bibr" rid="ref83">83</xref>,<xref ref-type="bibr" rid="ref84">84</xref>]. However, these models are often considered &#x201C;black boxes&#x201D; and typically require more data for training than the ML models considered in our study. Although recent advances in time-series explainability methods, such as those by Crabb&#x00E9; and Schaar [<xref ref-type="bibr" rid="ref85">85</xref>] and Enguehard [<xref ref-type="bibr" rid="ref86">86</xref>], have been proposed to overcome the challenge of interpreting deep learning time-series models, these approaches typically highlight only key segments in the time-series signal driving the model prediction. However, interpreting raw time series is challenging for humans. In contrast, our ML models use features based on well-studied physiological constructs, thereby facilitating the interpretation of model predictions using methods such as SHAP.</p><p>The feature importance analysis revealed the importance of fuzzy entropy for discriminating MS responses from other nonstress states. This finding aligns with prior work, which indicates that entropy-based measures, capturing the irregularity of the ECG signal, increase during periods of MS [<xref ref-type="bibr" rid="ref87">87</xref>]. Notably, 2 HR fragmentation indices, PSS and IALS, also emerged as important features, despite being less commonly used in ML models for detecting MS. Our findings further highlight the importance of frequency-based features, consistent with a comprehensive review [<xref ref-type="bibr" rid="ref28">28</xref>] covering 37 studies. In their studies, the authors identified changes in the low- and high-frequency components of the ECG signal as the most reported marker of MS.</p><p>Additionally, our analysis identified the VHF (as opposed to the UHF) band as an important predictor. Although the physiological origin of the VHF band is less well understood than that of the LF or HF bands, its predictive value likely reflects stress-induced hyperventilation, a well-documented physiological response to various stressors [<xref ref-type="bibr" rid="ref35">35</xref>-<xref ref-type="bibr" rid="ref37">37</xref>]. While the VHF band remained an important feature for distinguishing MS from a seated BL (<xref ref-type="supplementary-material" rid="app9">Multimedia Appendix 9</xref>; <xref ref-type="fig" rid="figure3">Figures 3</xref><xref ref-type="fig" rid="figure4"/>-<xref ref-type="fig" rid="figure5">5</xref>), suggesting a physiological contribution under sedentary conditions, we acknowledge that this band might also represent motion artifacts or electromyogram noise during active conditions. The low specificity observed during MPA may partially reflect this confound: If VHF power increases during physical activity, due to motion artifacts rather than vagal modulation, the model may misclassify physical exertion as MS. Further research on this band under stress is needed, particularly using accelerometry or respiratory measurements to distinguish stress-related changes from motion artifacts.</p><p>Our leave-one-stressor-out experiments demonstrated that our ML models could generalize to most held-out stressors, though performance varied by stressor. While SSST and Raven proved to be the most difficult stressors to recognize, the opposite was observed for TA: both models detected MS evoked by TA and its repeat condition, even when they had never been trained on it. This suggests that the TA stressor produces a strong and distinct physiological ECG response that can be easily distinguished from nonstress states. Whether this is related to differential psychological processes engaged by these tasks is an open question. The TA evokes frustration and a fear of punishment, whereas the SSST is social-evaluative and invokes a fear of exclusion, similar to Raven and mental arithmetic tasks, which also involve ego threats. Building on this, our findings imply that robust, generalizable models require training on a diverse set of various distinct mental stressors, or, when constrained, focus on mental stressors that evoke a strong physiological response, such as TA.</p><p>Although our results demonstrate that the ML models can detect MS with high sensitivity, distinguishing stress-evoked ECG responses from those induced by MPA remains especially challenging (specificity LR: 0.444; XGBoost: 0.418), even with a comprehensive set of 55 handcrafted features. Because deep learning models can learn more nuanced temporal ECG patterns, a possible path forward is to apply time-series deep learning directly to raw ECG data. Moreover, incorporating additional sensing modalities (eg, accelerometer or electrodermal activity) may help the model separate cardiac responses driven by metabolic demand from those driven by psychological stress. We therefore consider deep learning&#x2013;based multimodal approaches an important direction for future research.</p><p>Importantly, our results suggest potential for translation to photoplethysmography (PPG)-based settings, as the most predictive features in our ML models are derived from beat-to-beat dynamics, specifically, the R-peaks and interbeat intervals, rather than ECG waveform morphology (eg, T-wave amplitude). Notably, the XGBoost model maintains most of its performance with 10 features (AUROC: 0.729) or 5 features (AUROC: 0.716; <xref ref-type="fig" rid="figure6">Figure 6</xref>). These feature subsets consist of PPG-compatible features (HR statistics and frequency-domain features; <xref ref-type="supplementary-material" rid="app11">Multimedia Appendix 11</xref>), thereby supporting the possibility that our results may generalize to PPG signals. Consistent with this potential, Taoum&#x202F;et&#x202F;al [<xref ref-type="bibr" rid="ref88">88</xref>] demonstrated that HRV metrics derived from PPG agreed with the ECG counterpart for 37 out of 48 features for recordings of 5 minutes and 30 seconds, and further for 16 features in 1-minute and 30-second windows. However, since we train on 30-second ECG epochs, future work must determine whether PPG devices can reliably extract those intervals in similarly short windows. Encouragingly, our models remain robust even when the ECG is downsampled to 125 Hz, supporting deployment on lower-resolution PPG wearables. Yet, PPG signals are more susceptible to motion artifacts than research-grade ECG [<xref ref-type="bibr" rid="ref89">89</xref>], which may challenge the direct transfer of our feature pipeline. Given these considerations, validating and adapting our models for real-world PPG recordings, particularly under movement conditions and within 30-second windows, is therefore an important direction for future research and essential before our approach can be deployed on PPG-based wearables.</p></sec><sec id="s4-2"><title>Comparison With Prior Work</title><sec id="s4-2-1"><title>Dataset and Experimental Paradigms</title><p>MS detection has gained significant traction in recent years (Gedam and Paul [<xref ref-type="bibr" rid="ref90">90</xref>] and Sharma and Gedeon [<xref ref-type="bibr" rid="ref91">91</xref>] provide comprehensive reviews, Can et al [<xref ref-type="bibr" rid="ref92">92</xref>] provides stress detection in daily life scenarios, and Pataca et al [<xref ref-type="bibr" rid="ref93">93</xref>] provides wearable-based approaches), driven in part by the collection of large, multimodal datasets, some of which are publicly available, such as SWELL knowledge work [<xref ref-type="bibr" rid="ref14">14</xref>], wearable stress and affect detection [<xref ref-type="bibr" rid="ref13">13</xref>], StressID [<xref ref-type="bibr" rid="ref61">61</xref>], or ForDigitStress [<xref ref-type="bibr" rid="ref62">62</xref>]. While these datasets vary widely in modality, stressor type, and experimental setup, the majority rely on laboratory settings, inducing stress using the Trier Social Stress Test [<xref ref-type="bibr" rid="ref94">94</xref>], variants of the Stroop task [<xref ref-type="bibr" rid="ref95">95</xref>], or mental arithmetic challenges [<xref ref-type="bibr" rid="ref63">63</xref>], all of which are effective methods for inducing acute stress [<xref ref-type="bibr" rid="ref96">96</xref>].</p><p>Even though these datasets have significantly advanced the field, our experimental protocol and its associated dataset offer 2 distinct advantages compared with prior work.</p><p>First, our dataset includes 127 participants, which exceeds the sample sizes of most comparable ECG-based stress datasets [<xref ref-type="bibr" rid="ref13">13</xref>,<xref ref-type="bibr" rid="ref14">14</xref>,<xref ref-type="bibr" rid="ref61">61</xref>]. Our relatively large sample size helps to enhance the robustness and generalizability of our findings, especially given the high between-individual variability in physiological stress responses [<xref ref-type="bibr" rid="ref26">26</xref>,<xref ref-type="bibr" rid="ref63">63</xref>,<xref ref-type="bibr" rid="ref97">97</xref>] (also present in our study; see <xref ref-type="fig" rid="figure2">Figure 2</xref>). The small number of participants has also been noted as a limitation of prior work in a systematic review [<xref ref-type="bibr" rid="ref98">98</xref>]. Notably, Smets et al [<xref ref-type="bibr" rid="ref99">99</xref>] include 1002 participants; however, their study relies on an ambulatory design that uses self-reported stress via ecological momentary assessment. In contrast, our controlled laboratory protocol induces MS through standardized tasks and captures objective ECG measurements, which can be continuously observed.</p><p>Second, our experimental design incorporates 4 distinct mental stressors&#x2014;2 of which are repeated&#x2014;as well as a diverse set of nonstress conditions, including seated BL, recovery, or common everyday activities such as walking at a normal pace. This comprehensive experimental setup allows us to address a more ecologically valid and challenging classification problem: evaluating whether mental-stress-specific ECG patterns can be distinguished not only from rest, but also from other non-resting states.</p></sec><sec id="s4-2-2"><title>ML Performance and Analysis</title><p>Most ECG-based stress-detection studies frame the problem as distinguishing MS from a seated, nonstress BL [<xref ref-type="bibr" rid="ref13">13</xref>,<xref ref-type="bibr" rid="ref61">61</xref>,<xref ref-type="bibr" rid="ref62">62</xref>,<xref ref-type="bibr" rid="ref100">100</xref>]. In contrast, Sun&#x202F;et&#x202F;al [<xref ref-type="bibr" rid="ref63">63</xref>] achieved 80.9% between-individual accuracy in 20 participants by using an activity-aware, multimodal classifier, therefore combining ECG, accelerometer, and galvanic skin response to detect MS while sitting, standing, and walking. When the accelerometer input was removed, their performance dropped substantially, highlighting the importance of tracking the motion context. Although our ECG-only model does not match the multimodal performance reported by Sun et al [<xref ref-type="bibr" rid="ref63">63</xref>], it is worth noting that their approach relies on a 60-second window. In contrast, our study uses a 30-second window, which, as stated above, reduces model performance but enables faster detection. Moreover, our laboratory setup encompasses a broader range of mental stressors and leverages an expanded ECG feature set, highlighting the potential of feature-rich, single-sensor models.</p><p>Previous work has often simplified stress detection to distinguishing MS from a seated BL [<xref ref-type="bibr" rid="ref13">13</xref>,<xref ref-type="bibr" rid="ref61">61</xref>]. In contrast, our classification task differentiates stress from a composite set of nonstressful everyday activities (eg, walking at a normal pace, poststress recovery, and vacuum cleaning), yielding a more ecologically valid scenario in which people naturally move and recover throughout their day. However, this setup makes our classification task more challenging, and consequently, performance is lower than in studies limited to seated nonstress conditions. When we rerun our models on the classic stress versus seated BL task, AUROC and AUPRC both increase markedly (<xref ref-type="supplementary-material" rid="app12">Multimedia Appendix 12</xref>), confirming that the drop in metrics stems from the more demanding classification problem.</p><p>Beyond the expanded task, our approach also leverages a rich feature space of 55 time-, frequency-, nonlinear, and morphological ECG features, exceeding the number of ECG predictors used in other studies [<xref ref-type="bibr" rid="ref13">13</xref>,<xref ref-type="bibr" rid="ref61">61</xref>,<xref ref-type="bibr" rid="ref101">101</xref>]. We also include and investigate less commonly considered features, such as HR fragmentation indices, which proved to be of added value according to our feature importance analysis (PSS and IALS). To the best of our knowledge, no previous work has evaluated the impact of aggressive downsampling on downstream model performance. We show that reducing the sampling rate from 1000 to 125 Hz does not degrade performance. While our models were robust to sampling rates, the choice of the window size affected model performance: increasing the window size from 30 to 60 s led to notable performance improvements for both ML models. Consequently, our findings provide valuable insights into the effects of sampling rate and window size choices for MS detection, thereby informing the design of models for resource-constrained wearables.</p></sec><sec id="s4-2-3"><title>Model Generalization Across Mental Stressors</title><p>Most mental-stress classification studies evaluate models only on the same stressors used during training, while far fewer assess generalization to unseen stressors&#x2014;a critical capability for real-world deployment. Prior work often evaluates generalizability by training on one dataset and testing on another [<xref ref-type="bibr" rid="ref17">17</xref>-<xref ref-type="bibr" rid="ref19">19</xref>,<xref ref-type="bibr" rid="ref101">101</xref>]. While informative, such cross-dataset evaluations conflate multiple sources of variation&#x2014;individual demographics, sensor hardware, and stressor types&#x2014;making it challenging to attribute performance drops to any specific factor. In contrast, our leave-one-stressor-out evaluation isolates stressor novelty, enabling a more targeted assessment of a model&#x2019;s ability to detect MS under previously unseen conditions. As our leave-one-out-stressor experiments demonstrated, both LR and XGBoost could generalize to novel stressors, indicating the feasibility of ML models to adapt to unseen mental stimuli.</p><p>Taken together, our work advances MS detection from several perspectives. First, by leveraging an ECG dataset from 127 participants across 6 mental stressors and a diverse set of nonstress activities, our work addresses a more challenging, ecologically valid task: separating MS from a composite nonstress set that includes everyday activities, for which we demonstrate acceptable performance. Second, we demonstrate that unimodal ML ECG classifiers using 55 features are robust against downsampling from 1000 to 125 Hz, maintaining a performance of over 95% with just 10 features. This highlights the feasibility of lightweight and interpretable models for wearable deployment. Third, through our leave-one-stressor-out evaluation, we provide valuable insights into model generalization to unseen stressors, isolating the challenge of stressor novelty without confounds from demographic or hardware differences.</p></sec></sec><sec id="s4-3"><title>Limitations</title><p>Despite the results and insights presented in the paper, a few limitations warrant attention.</p><p>First, our cohort comprises Dutch-speaking young working adults (aged 18&#x2010;48 years). While our analytical framework is broadly applicable, physiological stress responses are highly individual and may vary across age groups, cultures, and health statuses.</p><p>Additionally, activity patterns might change over the lifespan [<xref ref-type="bibr" rid="ref102">102</xref>], further limiting generalizability. Importantly, our results do not directly generalize to older populations, given that HRV, HR reserve, and other time-domain features generally decline with age [<xref ref-type="bibr" rid="ref103">103</xref>], potentially making the classification problem more difficult. Given that distinguishing MS from cardiac responses evoked by physical activity proved very challenging despite our comparable young cohort, we anticipate that this challenge will persist (or even become more complicated) when considering an older population. An interesting direction for future work is therefore to validate our models in more diverse populations, such as older adults or non-Western cohorts, to ensure robustness and generalizability of our findings.</p><p>Second, we used global z-standardization across the training set to enable generalization to unseen users without requiring per-user calibration data. While this approach is attractive for real-world deployment, it does not account for the law of initial values [<xref ref-type="bibr" rid="ref104">104</xref>], that is, it ignores interindividual variability in physiological responses to mental stressors present in our study (<xref ref-type="fig" rid="figure2">Figure 2</xref>). Our global normalization scheme may therefore bias model predictions against specific physiological phenotypes, for instance, by increasing false-positive rates among individuals with elevated resting HR or reduced BL HRV. Future work could examine the extent to which global versus individual-aware normalization strategies systematically affect downstream model performances across different BL physiological characteristics.</p><p>Third, we rely solely on task labels rather than participants&#x2019; self-reports to assign MS conditions, which prevents us from confirming that every labeled stress window corresponds to actual perceived stress. Incorporating self-assessments of perceived stress could have therefore reduced label noise, particularly given the pronounced interindividual variability in physiological responses (<xref ref-type="fig" rid="figure2">Figure 2</xref> and <xref ref-type="supplementary-material" rid="app3">Multimedia Appendix 3</xref>), which may partly account for the observed failure to generalize to the SSST stressor. Furthermore, instead of treating stress as a binary variable (stress or nonstress), one could use perceived ratings to classify the specific level or intensity of perceived MS [<xref ref-type="bibr" rid="ref105">105</xref>].</p><p>Fourth, a related limitation of this study concerns the labeling of recovery periods. Physiological recovery from stress is gradual rather than instantaneous, suggesting that segments labeled &#x201C;non-stress&#x201D; during recovery periods likely retain residual stress physiology, constituting class-dependent label noise [<xref ref-type="bibr" rid="ref106">106</xref>]. Empirically, label noise has been shown to adversely affect model performance across various ML approaches, including deep learning methods [<xref ref-type="bibr" rid="ref107">107</xref>-<xref ref-type="bibr" rid="ref109">109</xref>]. Our sensitivity analysis supports this observation: excluding recovery periods improved AUPRC for LR and XGBoost (see &#x201C;Model Performance under Simplified Conditions&#x201D;). We retained recovery periods as nonstress for comparability with prior stress-detection literature, which commonly assigns labels based on the protocol, and to provide conservative performance estimates. Yet, an alternative approach could either treat recovery as a buffer period excluded from training and evaluation or apply methods such as importance reweighting [<xref ref-type="bibr" rid="ref110">110</xref>] or weighted surrogate loss [<xref ref-type="bibr" rid="ref111">111</xref>].</p><p>Fifth, while our study uses ECG data to detect MS and downsamples the signal to mimic the hardware constraints of consumer-grade devices, such wearables typically rely on PPG. The most predictive features in our models are derived from R-peaks and beat-to-beat dynamics, which can also be extracted from PPG, suggesting that our approach may be transferable. However, this hypothesis remains to be validated empirically. Future work should therefore assess the extent to which our findings can be generalized to PPG-based settings.</p><p>Last, the MS tasks in our experimental setup were performed while participants remained seated. In everyday life, however, individuals often experience MS while standing or moving, for example, a teacher navigating a lively classroom. Future work should therefore extend the evaluation to simultaneous stress-and-activity scenarios to fully validate ambulatory MS detection, as done by Sun et al [<xref ref-type="bibr" rid="ref63">63</xref>], Hosseini et al [<xref ref-type="bibr" rid="ref112">112</xref>], and Kaczor et al [<xref ref-type="bibr" rid="ref113">113</xref>].</p></sec><sec id="s4-4"><title>Conclusions</title><p>This study evaluated ML models for distinguishing MS from routine physical activities using a single-sensor ECG. Both LR and XGBoost achieved acceptable discriminative performance (AUROC &#x003E;0.7), with XGBoost providing only marginal benefits over the linear BL (<italic>&#x0394;</italic>&#x003C;0.02), suggesting that simple, interpretable models can perform competitively. Performance remained robust when downsampling from 1000 to 125 Hz and reducing the feature set to 10, thereby supporting lightweight deployment. However, our findings reveal a critical limitation of single-sensor ECG approaches: although both models detected MS with acceptable to good sensitivity (XGBoost: 0.800; LR: 0.782), specificity was low (XGBoost: 0.512; LR: 0.509), particularly for MPA. This indicates that stress-induced cardiac responses cannot be reliably distinguished from those driven by physical exertion using ECG features alone. Generalization to unseen stressors was also stressor-dependent, with models performing well above chance for most stressors but near chance for the social-evaluative stimulus (SSST). Future studies should validate our findings in ambulatory settings where MS and physical activity can co-occur and explore both multimodal approaches and the use of PPG via wearables as a practical alternative to ECG for continuous stress monitoring.</p></sec></sec></body><back><ack><p>This article was written as part of the research project &#x201C;Stress in Action&#x201D;. We used artificial intelligence tools, specifically ChatGPT and Claude, to speed up coding and debugging. Furthermore, we used these tools to improve the syntax, clarity, and grammar of the written paragraphs. Importantly, all outputs were carefully checked and reviewed.</p></ack><notes><sec><title>Funding</title><p>Stress in Action is financially supported by the Dutch Research Council and the Dutch Ministry of Education, Culture, and Science (NWO gravitation grant number 024.005.010).</p></sec><sec><title>Data Availability</title><p>The dataset analyzed in this study is not publicly available, and a data access agreement is required to access and analyze the data. The code is publicly available on the GitHub repository [<xref ref-type="bibr" rid="ref75">75</xref>].</p></sec></notes><fn-group><fn fn-type="con"><p>BU contributed to the conceptualization, formal analysis, coding, methodology, and writing of the manuscript. AA contributed to formal analysis, coding, and investigation. SvdV and DJvdM provided consultation and contributed to the review and editing of the manuscript. EdG and MH provided supervision and also contributed to the review and editing of the manuscript.</p></fn><fn fn-type="conflict"><p>EdG owns shares in VU Ambulatory Monitoring Solutions Ltd. The remaining authors declare no competing interests.</p></fn></fn-group><glossary><title>Abbreviations</title><def-list><def-item><term id="abb1">ANS</term><def><p>autonomic nervous system</p></def></def-item><def-item><term id="abb2">AUPRC</term><def><p>area under the precision-recall curve</p></def></def-item><def-item><term id="abb3">AUROC</term><def><p>area under the receiver operating characteristic</p></def></def-item><def-item><term id="abb4">AVNN</term><def><p>average normal-to-normal</p></def></def-item><def-item><term id="abb5">BL</term><def><p>baseline</p></def></def-item><def-item><term id="abb6">BPM</term><def><p>beats per minute</p></def></def-item><def-item><term id="abb7">ECG</term><def><p>electrocardiogram</p></def></def-item><def-item><term id="abb8">HF</term><def><p>high frequency</p></def></def-item><def-item><term id="abb9">HR</term><def><p>heart rate</p></def></def-item><def-item><term id="abb10">HRA</term><def><p>heart rate asymmetry</p></def></def-item><def-item><term id="abb11">HRV</term><def><p>heart rate variability</p></def></def-item><def-item><term id="abb12">Hz</term><def><p>hertz</p></def></def-item><def-item><term id="abb13">IALS</term><def><p>inverse average length of segments</p></def></def-item><def-item><term id="abb14">LPA</term><def><p>low physical activity</p></def></def-item><def-item><term id="abb15">LR</term><def><p>logistic regression</p></def></def-item><def-item><term id="abb16">ML</term><def><p>machine learning</p></def></def-item><def-item><term id="abb17">MMA</term><def><p>modified moving average</p></def></def-item><def-item><term id="abb18">MPA</term><def><p>moderate physical activity</p></def></def-item><def-item><term id="abb19">MS</term><def><p>mental stress</p></def></def-item><def-item><term id="abb20">NN</term><def><p>normal-to-normal</p></def></def-item><def-item><term id="abb21">PASAT</term><def><p>paced auditory serial addition task</p></def></def-item><def-item><term id="abb22">PPG</term><def><p>photoplethysmography</p></def></def-item><def-item><term id="abb23">PSS</term><def><p>percentage of short segments</p></def></def-item><def-item><term id="abb24">RAVEN</term><def><p>Raven&#x2019;s Progressive Matrices</p></def></def-item><def-item><term id="abb25">RF</term><def><p>random forest</p></def></def-item><def-item><term id="abb26">RMSSD</term><def><p>root-mean-square of successive differences</p></def></def-item><def-item><term id="abb27">SDNN</term><def><p>SD of the NN intervals</p></def></def-item><def-item><term id="abb28">SHAP</term><def><p>Shapley additive explanation</p></def></def-item><def-item><term id="abb29">SMOTE</term><def><p>synthetic minority oversampling technique</p></def></def-item><def-item><term id="abb30">SQI</term><def><p>signal quality index</p></def></def-item><def-item><term id="abb31">SSST</term><def><p>sing-a-song-stress test</p></def></def-item><def-item><term id="abb32">TA</term><def><p>tone avoidance</p></def></def-item><def-item><term id="abb33">TRIPOD+AI</term><def><p>Transparent Reporting of a Multivariable Prediction Model for Individual Prognosis or Diagnosis&#x2013;Artificial Intelligence</p></def></def-item><def-item><term id="abb34">TWA</term><def><p>T-wave alternans</p></def></def-item><def-item><term id="abb35">UHF</term><def><p>ultra-high frequency</p></def></def-item><def-item><term id="abb36">VHF</term><def><p>very high frequency</p></def></def-item><def-item><term id="abb37">XGBoost</term><def><p>extreme gradient boosting</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Razavi</surname><given-names>M</given-names> </name><name name-style="western"><surname>Ziyadidegan</surname><given-names>S</given-names> </name><name name-style="western"><surname>Mahmoudzadeh</surname><given-names>A</given-names> </name><etal/></person-group><article-title>Machine learning, deep learning, and data preprocessing techniques for detecting, predicting, and monitoring stress and stress-related mental disorders: scoping review</article-title><source>JMIR Ment Health</source><year>2024</year><month>08</month><day>21</day><volume>11</volume><fpage>e53714</fpage><pub-id pub-id-type="doi">10.2196/53714</pub-id><pub-id pub-id-type="medline">39167782</pub-id></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Cohen</surname><given-names>S</given-names> </name><name name-style="western"><surname>Gianaros</surname><given-names>PJ</given-names> </name><name name-style="western"><surname>Manuck</surname><given-names>SB</given-names> </name></person-group><article-title>A stage model of stress and disease</article-title><source>Perspect Psychol Sci</source><year>2016</year><month>07</month><volume>11</volume><issue>4</issue><fpage>456</fpage><lpage>463</lpage><pub-id pub-id-type="doi">10.1177/1745691616646305</pub-id><pub-id pub-id-type="medline">27474134</pub-id></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Le Fevre</surname><given-names>M</given-names> </name><name name-style="western"><surname>Matheny</surname><given-names>J</given-names> </name><name name-style="western"><surname>Kolt</surname><given-names>GS</given-names> </name></person-group><article-title>Eustress, distress, and interpretation in occupational stress</article-title><source>J Manag Psychol</source><year>2003</year><month>11</month><day>1</day><volume>18</volume><issue>7</issue><fpage>726</fpage><lpage>744</lpage><pub-id pub-id-type="doi">10.1108/02683940310502412</pub-id></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Cohen</surname><given-names>S</given-names> </name><name name-style="western"><surname>Janicki-Deverts</surname><given-names>D</given-names> </name><name name-style="western"><surname>Miller</surname><given-names>GE</given-names> </name></person-group><article-title>Psychological stress and disease</article-title><source>JAMA</source><year>2007</year><month>10</month><day>10</day><volume>298</volume><issue>14</issue><fpage>1685</fpage><lpage>1687</lpage><pub-id pub-id-type="doi">10.1001/jama.298.14.1685</pub-id><pub-id pub-id-type="medline">17925521</pub-id></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Hammen</surname><given-names>C</given-names> </name></person-group><article-title>Stress and depression</article-title><source>Annu Rev Clin Psychol</source><year>2005</year><volume>1</volume><issue>293-319</issue><fpage>293</fpage><lpage>319</lpage><pub-id pub-id-type="doi">10.1146/annurev.clinpsy.1.102803.143938</pub-id><pub-id pub-id-type="medline">17716090</pub-id></nlm-citation></ref><ref id="ref6"><label>6</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kivim&#x00E4;ki</surname><given-names>M</given-names> </name><name name-style="western"><surname>Steptoe</surname><given-names>A</given-names> </name></person-group><article-title>Effects of stress on the development and progression of cardiovascular disease</article-title><source>Nat Rev Cardiol</source><year>2018</year><month>04</month><volume>15</volume><issue>4</issue><fpage>215</fpage><lpage>229</lpage><pub-id pub-id-type="doi">10.1038/nrcardio.2017.189</pub-id><pub-id pub-id-type="medline">29213140</pub-id></nlm-citation></ref><ref id="ref7"><label>7</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>J&#x00F6;nsson</surname><given-names>P</given-names> </name><name name-style="western"><surname>Wallerg&#x00E5;rd</surname><given-names>M</given-names> </name><name name-style="western"><surname>Osterberg</surname><given-names>K</given-names> </name><name name-style="western"><surname>Hansen</surname><given-names>AM</given-names> </name><name name-style="western"><surname>Johansson</surname><given-names>G</given-names> </name><name name-style="western"><surname>Karlson</surname><given-names>B</given-names> </name></person-group><article-title>Cardiovascular and cortisol reactivity and habituation to a virtual reality version of the trier social stress test: a pilot study</article-title><source>Psychoneuroendocrinology</source><year>2010</year><month>10</month><volume>35</volume><issue>9</issue><fpage>1397</fpage><lpage>1403</lpage><pub-id pub-id-type="doi">10.1016/j.psyneuen.2010.04.003</pub-id><pub-id pub-id-type="medline">20451329</pub-id></nlm-citation></ref><ref id="ref8"><label>8</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Cohen</surname><given-names>S</given-names> </name><name name-style="western"><surname>Kamarck</surname><given-names>T</given-names> </name><name name-style="western"><surname>Mermelstein</surname><given-names>R</given-names> </name></person-group><article-title>A global measure of perceived stress</article-title><source>J Health Soc Behav</source><year>1983</year><month>12</month><volume>24</volume><issue>4</issue><fpage>385</fpage><lpage>396</lpage><pub-id pub-id-type="medline">6668417</pub-id></nlm-citation></ref><ref id="ref9"><label>9</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Martinez</surname><given-names>GJ</given-names> </name><name name-style="western"><surname>Grover</surname><given-names>T</given-names> </name><name name-style="western"><surname>Mattingly</surname><given-names>SM</given-names> </name><etal/></person-group><article-title>Alignment between heart rate variability from fitness trackers and perceived stress: perspectives from a large-scale in situ longitudinal study of information workers</article-title><source>JMIR Hum Factors</source><year>2022</year><month>08</month><day>4</day><volume>9</volume><issue>3</issue><fpage>e33754</fpage><pub-id pub-id-type="doi">10.2196/33754</pub-id><pub-id pub-id-type="medline">35925662</pub-id></nlm-citation></ref><ref id="ref10"><label>10</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Choi</surname><given-names>J</given-names> </name><name name-style="western"><surname>Gutierrez-Osuna</surname><given-names>R</given-names> </name></person-group><article-title>Using heart rate monitors to detect mental stress</article-title><year>2009</year><conf-name>Proceedings of the 2009 Sixth International Workshop on Wearable and Implantable Body Sensor Networks</conf-name><conf-date>Jun 3-5, 2009</conf-date></nlm-citation></ref><ref id="ref11"><label>11</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Won</surname><given-names>E</given-names> </name><name name-style="western"><surname>Kim</surname><given-names>YK</given-names> </name></person-group><article-title>Stress, the autonomic nervous system, and the immune-kynurenine pathway in the etiology of depression</article-title><source>Curr Neuropharmacol</source><year>2016</year><volume>14</volume><issue>7</issue><fpage>665</fpage><lpage>673</lpage><pub-id pub-id-type="doi">10.2174/1570159x14666151208113006</pub-id><pub-id pub-id-type="medline">27640517</pub-id></nlm-citation></ref><ref id="ref12"><label>12</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Jordan</surname><given-names>MI</given-names> </name><name name-style="western"><surname>Mitchell</surname><given-names>TM</given-names> </name></person-group><article-title>Machine learning: trends, perspectives, and prospects</article-title><source>Science</source><year>2015</year><month>07</month><day>17</day><volume>349</volume><issue>6245</issue><fpage>255</fpage><lpage>260</lpage><pub-id pub-id-type="doi">10.1126/science.aaa8415</pub-id><pub-id pub-id-type="medline">26185243</pub-id></nlm-citation></ref><ref id="ref13"><label>13</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Schmidt</surname><given-names>P</given-names> </name><name name-style="western"><surname>Reiss</surname><given-names>A</given-names> </name><name name-style="western"><surname>Duerichen</surname><given-names>R</given-names> </name><name name-style="western"><surname>Marberger</surname><given-names>C</given-names> </name><name name-style="western"><surname>Van Laerhoven</surname><given-names>K</given-names> </name></person-group><article-title>Introducing WESAD, a multimodal dataset for wearable stress and affect detection</article-title><conf-name>ICMI &#x2019;18: Proceedings of the 20th ACM International Conference on Multimodal Interaction</conf-name><conf-date>Oct 16-20, 2018</conf-date><conf-loc>Boulder CO USA</conf-loc><fpage>400</fpage><lpage>408</lpage><pub-id pub-id-type="doi">10.1145/3242969.3242985</pub-id></nlm-citation></ref><ref id="ref14"><label>14</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Koldijk</surname><given-names>S</given-names> </name><name name-style="western"><surname>Sappelli</surname><given-names>M</given-names> </name><name name-style="western"><surname>Verberne</surname><given-names>S</given-names> </name><name name-style="western"><surname>Neerincx</surname><given-names>MA</given-names> </name><name name-style="western"><surname>Kraaij</surname><given-names>W</given-names> </name></person-group><article-title>The swell knowledge work dataset for stress and user modeling research</article-title><conf-name>ICMI &#x2019;14: Proceedings of the 16th International Conference on Multimodal Interaction</conf-name><conf-date>Nov 12-16, 2014</conf-date><pub-id pub-id-type="doi">10.1145/2663204.2663257</pub-id></nlm-citation></ref><ref id="ref15"><label>15</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Li</surname><given-names>RA</given-names> </name><name name-style="western"><surname>Liu</surname><given-names>Z</given-names> </name></person-group><article-title>Stress detection using deep neural networks</article-title><source>BMC Med Inform Decis Mak</source><year>2020</year><month>12</month><volume>20</volume><issue>S11</issue><pub-id pub-id-type="doi">10.1186/s12911-020-01299-4</pub-id></nlm-citation></ref><ref id="ref16"><label>16</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Zhu</surname><given-names>L</given-names> </name><name name-style="western"><surname>Spachos</surname><given-names>P</given-names> </name><name name-style="western"><surname>Ng</surname><given-names>PC</given-names> </name><etal/></person-group><article-title>Stress detection through wrist-based electrodermal activity monitoring and machine learning</article-title><source>IEEE J Biomed Health Inform</source><year>2023</year><volume>27</volume><issue>5</issue><fpage>2155</fpage><lpage>2165</lpage><pub-id pub-id-type="doi">10.1109/JBHI.2023.3239305</pub-id></nlm-citation></ref><ref id="ref17"><label>17</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Prajod</surname><given-names>P</given-names> </name><name name-style="western"><surname>Mahesh</surname><given-names>B</given-names> </name><name name-style="western"><surname>Andr&#x00E9;</surname><given-names>E</given-names> </name></person-group><article-title>Stressor type matters! --- exploring factors influencing cross-dataset generalizability of physiological stress detection</article-title><conf-name>ICMI &#x2019;24: Proceedings of the 26th International Conference on Multimodal Interaction</conf-name><conf-date>Nov 4-8, 2024</conf-date><conf-loc>San Jose Costa Rica</conf-loc><fpage>508</fpage><lpage>517</lpage><pub-id pub-id-type="doi">10.1145/3678957.3685738</pub-id></nlm-citation></ref><ref id="ref18"><label>18</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Mishra</surname><given-names>V</given-names> </name><name name-style="western"><surname>Sen</surname><given-names>S</given-names> </name><name name-style="western"><surname>Chen</surname><given-names>G</given-names> </name><etal/></person-group><article-title>Evaluating the reproducibility of physiological stress detection models</article-title><source>Proc ACM Interact Mob Wearable Ubiquitous Technol</source><year>2020</year><month>12</month><volume>4</volume><issue>4</issue><fpage>147</fpage><pub-id pub-id-type="doi">10.1145/3432220</pub-id><pub-id pub-id-type="medline">36189150</pub-id></nlm-citation></ref><ref id="ref19"><label>19</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Cho</surname><given-names>HM</given-names> </name><name name-style="western"><surname>Park</surname><given-names>H</given-names> </name><name name-style="western"><surname>Dong</surname><given-names>SY</given-names> </name><name name-style="western"><surname>Youn</surname><given-names>I</given-names> </name></person-group><article-title>Ambulatory and laboratory stress detection based on raw electrocardiogram signals using a convolutional neural network</article-title><source>Sensors (Basel)</source><year>2019</year><month>10</month><day>11</day><volume>19</volume><issue>20</issue><fpage>4408</fpage><pub-id pub-id-type="doi">10.3390/s19204408</pub-id><pub-id pub-id-type="medline">31614646</pub-id></nlm-citation></ref><ref id="ref20"><label>20</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>van der Mee</surname><given-names>DJ</given-names> </name><name name-style="western"><surname>Gevonden</surname><given-names>MJ</given-names> </name><name name-style="western"><surname>Westerink</surname><given-names>J</given-names> </name><name name-style="western"><surname>de Geus</surname><given-names>EJC</given-names> </name></person-group><article-title>Validity of electrodermal activity-based measures of sympathetic nervous system activity from a wrist-worn device</article-title><source>Int J Psychophysiol</source><year>2021</year><month>10</month><volume>168</volume><fpage>52</fpage><lpage>64</lpage><pub-id pub-id-type="doi">10.1016/j.ijpsycho.2021.08.003</pub-id><pub-id pub-id-type="medline">34418464</pub-id></nlm-citation></ref><ref id="ref21"><label>21</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Collins</surname><given-names>GS</given-names> </name><name name-style="western"><surname>Reitsma</surname><given-names>JB</given-names> </name><name name-style="western"><surname>Altman</surname><given-names>DG</given-names> </name><name name-style="western"><surname>Moons</surname><given-names>KGM</given-names> </name></person-group><article-title>Transparent reporting of a multivariable prediction model for individual prognosis or diagnosis (TRIPOD): the TRIPOD statement</article-title><source>BMC Med</source><year>2015</year><month>01</month><day>6</day><volume>13</volume><issue>1</issue><fpage>1</fpage><pub-id pub-id-type="doi">10.1186/s12916-014-0241-z</pub-id><pub-id pub-id-type="medline">25563062</pub-id></nlm-citation></ref><ref id="ref22"><label>22</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Collins</surname><given-names>GS</given-names> </name><name name-style="western"><surname>Moons</surname><given-names>KGM</given-names> </name><name name-style="western"><surname>Dhiman</surname><given-names>P</given-names> </name><etal/></person-group><article-title>TRIPOD+AI statement: updated guidance for reporting clinical prediction models that use regression or machine learning methods</article-title><source>BMJ</source><year>2024</year><volume>385</volume><fpage>e078378</fpage><pub-id pub-id-type="doi">10.1136/bmj-2023-078378</pub-id></nlm-citation></ref><ref id="ref23"><label>23</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Myin-Germeys</surname><given-names>I</given-names> </name><name name-style="western"><surname>van Os</surname><given-names>J</given-names> </name><name name-style="western"><surname>Schwartz</surname><given-names>JE</given-names> </name><name name-style="western"><surname>Stone</surname><given-names>AA</given-names> </name><name name-style="western"><surname>Delespaul</surname><given-names>PA</given-names> </name></person-group><article-title>Emotional reactivity to daily life stress in psychosis</article-title><source>Arch Gen Psychiatry</source><year>2001</year><month>12</month><volume>58</volume><issue>12</issue><fpage>1137</fpage><lpage>1144</lpage><pub-id pub-id-type="doi">10.1001/archpsyc.58.12.1137</pub-id><pub-id pub-id-type="medline">11735842</pub-id></nlm-citation></ref><ref id="ref24"><label>24</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Makowski</surname><given-names>D</given-names> </name><name name-style="western"><surname>Pham</surname><given-names>T</given-names> </name><name name-style="western"><surname>Lau</surname><given-names>ZJ</given-names> </name><etal/></person-group><article-title>NeuroKit2: a Python toolbox for neurophysiological signal processing</article-title><source>Behav Res Methods</source><year>2021</year><month>08</month><volume>53</volume><issue>4</issue><fpage>1689</fpage><lpage>1696</lpage><pub-id pub-id-type="doi">10.3758/s13428-020-01516-y</pub-id><pub-id pub-id-type="medline">33528817</pub-id></nlm-citation></ref><ref id="ref25"><label>25</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Bernardes</surname><given-names>A</given-names> </name><name name-style="western"><surname>Couceiro</surname><given-names>R</given-names> </name><name name-style="western"><surname>Medeiros</surname><given-names>J</given-names> </name><etal/></person-group><article-title>How reliable are ultra-short-term HRV measurements during cognitively demanding tasks?</article-title><source>Sensors (Basel)</source><year>2022</year><month>08</month><day>30</day><volume>22</volume><issue>17</issue><fpage>6528</fpage><pub-id pub-id-type="doi">10.3390/s22176528</pub-id><pub-id pub-id-type="medline">36080987</pub-id></nlm-citation></ref><ref id="ref26"><label>26</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Smets</surname><given-names>E</given-names> </name><name name-style="western"><surname>Casale</surname><given-names>P</given-names> </name><name name-style="western"><surname>Gro&#x00DF;ekath&#x00F6;fer</surname><given-names>U</given-names> </name><name name-style="western"><surname>Lamichhane</surname><given-names>B</given-names> </name><name name-style="western"><surname>Raedt</surname><given-names>W</given-names> </name><name name-style="western"><surname>Bogaerts</surname><given-names>K</given-names> </name></person-group><source>Comparison of Machine Learning Techniques for Psychophysiological Stress Detection Pervasive Computing Paradigms for Mental Health</source><year>2015</year><publisher-name>Springer International Publishing</publisher-name><pub-id pub-id-type="doi">10.1007/978-3-319-32270-4_2</pub-id></nlm-citation></ref><ref id="ref27"><label>27</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kaegi</surname><given-names>DM</given-names> </name><name name-style="western"><surname>Halamek</surname><given-names>LP</given-names> </name><name name-style="western"><surname>Van Hare</surname><given-names>GF</given-names> </name><name name-style="western"><surname>Howard</surname><given-names>SK</given-names> </name><name name-style="western"><surname>Dubin</surname><given-names>AM</given-names> </name></person-group><article-title>Effect of mental stress on heart rate variability: validation of simulated operating and delivery room training modules</article-title><source>Pediatr Res</source><year>1999</year><month>04</month><volume>45</volume><issue>4, Part 2 of 2</issue><fpage>77A</fpage><lpage>77A</lpage><pub-id pub-id-type="doi">10.1203/00006450-199904020-00463</pub-id></nlm-citation></ref><ref id="ref28"><label>28</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kim</surname><given-names>HG</given-names> </name><name name-style="western"><surname>Cheon</surname><given-names>EJ</given-names> </name><name name-style="western"><surname>Bai</surname><given-names>DS</given-names> </name><name name-style="western"><surname>Lee</surname><given-names>YH</given-names> </name><name name-style="western"><surname>Koo</surname><given-names>BH</given-names> </name></person-group><article-title>Stress and heart rate variability: a meta-analysis and review of the literature</article-title><source>Psychiatry Investig</source><year>2018</year><month>03</month><volume>15</volume><issue>3</issue><fpage>235</fpage><lpage>245</lpage><pub-id pub-id-type="doi">10.30773/pi.2017.08.17</pub-id></nlm-citation></ref><ref id="ref29"><label>29</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kleiger</surname><given-names>RE</given-names> </name><name name-style="western"><surname>Stein</surname><given-names>PK</given-names> </name><name name-style="western"><surname>Bosner</surname><given-names>MS</given-names> </name><name name-style="western"><surname>Rottman</surname><given-names>JN</given-names> </name></person-group><article-title>Time domain measurements of heart rate variability</article-title><source>Cardiol Clin</source><year>1992</year><month>08</month><volume>10</volume><issue>3</issue><fpage>487</fpage><lpage>498</lpage><pub-id pub-id-type="doi">10.1016/s0733-8651(18)30230-3</pub-id><pub-id pub-id-type="medline">1504980</pub-id></nlm-citation></ref><ref id="ref30"><label>30</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Dalmeida</surname><given-names>KM</given-names> </name><name name-style="western"><surname>Masala</surname><given-names>GL</given-names> </name></person-group><article-title>HRV features as viable physiological markers for stress detection using wearable devices</article-title><source>Sensors (Basel)</source><year>2021</year><month>04</month><day>19</day><volume>21</volume><issue>8</issue><fpage>2873</fpage><pub-id pub-id-type="doi">10.3390/s21082873</pub-id><pub-id pub-id-type="medline">33921884</pub-id></nlm-citation></ref><ref id="ref31"><label>31</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wang</surname><given-names>HM</given-names> </name><name name-style="western"><surname>Huang</surname><given-names>SC</given-names> </name></person-group><article-title>SDNN/RMSSD as a surrogate for LF/HF: a revised investigation</article-title><source>Model Simul Eng</source><year>2012</year><volume>2012</volume><fpage>1</fpage><lpage>8</lpage><pub-id pub-id-type="doi">10.1155/2012/931943</pub-id></nlm-citation></ref><ref id="ref32"><label>32</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sollers</surname><given-names>JJ</given-names>  <suffix>3rd</suffix></name><name name-style="western"><surname>Buchanan</surname><given-names>TW</given-names> </name><name name-style="western"><surname>Mowrer</surname><given-names>SM</given-names> </name><name name-style="western"><surname>Hill</surname><given-names>LK</given-names> </name><name name-style="western"><surname>Thayer</surname><given-names>JF</given-names> </name></person-group><article-title>Comparison of the ratio of the standard deviation of the R-R interval and the root mean squared successive differences (SD/rMSSD) to the low frequency-to-high frequency (LF/HF) ratio in a patient population and normal healthy controls</article-title><source>Biomed Sci Instrum</source><year>2007</year><volume>43</volume><fpage>158</fpage><lpage>163</lpage><pub-id pub-id-type="medline">17487074</pub-id></nlm-citation></ref><ref id="ref33"><label>33</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Salahuddin</surname><given-names>L</given-names> </name><name name-style="western"><surname>Cho</surname><given-names>J</given-names> </name><name name-style="western"><surname>Jeong</surname><given-names>MG</given-names> </name><name name-style="western"><surname>Kim</surname><given-names>D</given-names> </name></person-group><article-title>Ultra short term analysis of heart rate variability for monitoring mental stress in mobile settings</article-title><source>Annu Int Conf IEEE Eng Med Biol Soc</source><year>2007</year><volume>2007</volume><fpage>4656</fpage><lpage>4659</lpage><pub-id pub-id-type="doi">10.1109/IEMBS.2007.4353378</pub-id><pub-id pub-id-type="medline">18003044</pub-id></nlm-citation></ref><ref id="ref34"><label>34</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Pereira</surname><given-names>T</given-names> </name><name name-style="western"><surname>Almeida</surname><given-names>PR</given-names> </name><name name-style="western"><surname>Cunha</surname><given-names>JPS</given-names> </name><name name-style="western"><surname>Aguiar</surname><given-names>A</given-names> </name></person-group><article-title>Heart rate variability metrics for fine-grained stress level assessment</article-title><source>Comput Methods Programs Biomed</source><year>2017</year><month>09</month><volume>148</volume><fpage>71</fpage><lpage>80</lpage><pub-id pub-id-type="doi">10.1016/j.cmpb.2017.06.018</pub-id><pub-id pub-id-type="medline">28774440</pub-id></nlm-citation></ref><ref id="ref35"><label>35</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Tipton</surname><given-names>MJ</given-names> </name><name name-style="western"><surname>Harper</surname><given-names>A</given-names> </name><name name-style="western"><surname>Paton</surname><given-names>JFR</given-names> </name><name name-style="western"><surname>Costello</surname><given-names>JT</given-names> </name></person-group><article-title>The human ventilatory response to stress: rate or depth?</article-title><source>J Physiol (Lond)</source><year>2017</year><month>09</month><volume>595</volume><issue>17</issue><fpage>5729</fpage><lpage>5752</lpage><pub-id pub-id-type="doi">10.1113/JP274596</pub-id></nlm-citation></ref><ref id="ref36"><label>36</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Masaoka</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Homma</surname><given-names>I</given-names> </name></person-group><article-title>Anxiety and respiratory patterns: their relationship during mental stress and physical load</article-title><source>Int J Psychophysiol</source><year>1997</year><month>09</month><volume>27</volume><issue>2</issue><fpage>153</fpage><lpage>159</lpage><pub-id pub-id-type="doi">10.1016/s0167-8760(97)00052-4</pub-id><pub-id pub-id-type="medline">9342646</pub-id></nlm-citation></ref><ref id="ref37"><label>37</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Masaoka</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Homma</surname><given-names>I</given-names> </name></person-group><article-title>The effect of anticipatory anxiety on breathing and metabolism in humans</article-title><source>Respir Physiol</source><year>2001</year><month>11</month><day>1</day><volume>128</volume><issue>2</issue><fpage>171</fpage><lpage>177</lpage><pub-id pub-id-type="doi">10.1016/s0034-5687(01)00278-x</pub-id><pub-id pub-id-type="medline">11812381</pub-id></nlm-citation></ref><ref id="ref38"><label>38</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Hernando</surname><given-names>A</given-names> </name><name name-style="western"><surname>Lazaro</surname><given-names>J</given-names> </name><name name-style="western"><surname>Gil</surname><given-names>E</given-names> </name><etal/></person-group><article-title>Inclusion of respiratory frequency information in heart rate variability analysis for stress assessment</article-title><source>IEEE J Biomed Health Inform</source><year>2016</year><month>07</month><volume>20</volume><issue>4</issue><fpage>1016</fpage><lpage>1025</lpage><pub-id pub-id-type="doi">10.1109/JBHI.2016.2553578</pub-id><pub-id pub-id-type="medline">27093713</pub-id></nlm-citation></ref><ref id="ref39"><label>39</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Karthikeyan</surname><given-names>P</given-names> </name><name name-style="western"><surname>Murugappan</surname><given-names>M</given-names> </name><name name-style="western"><surname>Yaacob</surname><given-names>S</given-names> </name></person-group><article-title>A study on mental arithmetic task based human stress level classification using discrete wavelet transform</article-title><conf-name>2012 IEEE conference on sustainable utilization and development in engineering and technology (STUDENT2012)</conf-name><conf-date>Oct 6-9, 2012</conf-date><pub-id pub-id-type="doi">10.1109/STUDENT.2012.6408369</pub-id></nlm-citation></ref><ref id="ref40"><label>40</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Webber</surname><given-names>CL</given-names>  <suffix>Jr</suffix></name><name name-style="western"><surname>Zbilut</surname><given-names>JP</given-names> </name></person-group><article-title>Dynamical assessment of physiological systems and states using recurrence plot strategies</article-title><source>J Appl Physiol</source><year>1994</year><month>02</month><day>1</day><volume>76</volume><issue>2</issue><fpage>965</fpage><lpage>973</lpage><pub-id pub-id-type="doi">10.1152/jappl.1994.76.2.965</pub-id></nlm-citation></ref><ref id="ref41"><label>41</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Tanev</surname><given-names>G</given-names> </name><name name-style="western"><surname>Saadi</surname><given-names>DB</given-names> </name><name name-style="western"><surname>Hoppe</surname><given-names>K</given-names> </name><name name-style="western"><surname>Sorensen</surname><given-names>HBD</given-names> </name></person-group><article-title>Classification of acute stress using linear and non-linear heart rate variability analysis derived from sternal ECG</article-title><conf-name>2014 36th Annual International Conference of the IEEE Engineering in Medicine and Biology Society (EMBC)</conf-name><conf-date>Aug 26-30, 2014</conf-date><pub-id pub-id-type="doi">10.1109/EMBC.2014.6944349</pub-id></nlm-citation></ref><ref id="ref42"><label>42</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Pincus</surname><given-names>SM</given-names> </name></person-group><article-title>Approximate entropy as a measure of system complexity</article-title><source>Proc Natl Acad Sci USA</source><year>1991</year><month>03</month><day>15</day><volume>88</volume><issue>6</issue><fpage>2297</fpage><lpage>2301</lpage><pub-id pub-id-type="doi">10.1073/pnas.88.6.2297</pub-id></nlm-citation></ref><ref id="ref43"><label>43</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Richman</surname><given-names>JS</given-names> </name><name name-style="western"><surname>Moorman</surname><given-names>JR</given-names> </name></person-group><article-title>Physiological time-series analysis using approximate entropy and sample entropy</article-title><source>Am J Physiol Heart Circ Physiol</source><year>2000</year><month>06</month><volume>278</volume><issue>6</issue><fpage>H2039</fpage><lpage>49</lpage><pub-id pub-id-type="doi">10.1152/ajpheart.2000.278.6.H2039</pub-id><pub-id pub-id-type="medline">10843903</pub-id></nlm-citation></ref><ref id="ref44"><label>44</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Chen</surname><given-names>W</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Xie</surname><given-names>H</given-names> </name><name name-style="western"><surname>Yu</surname><given-names>W</given-names> </name></person-group><article-title>Characterization of surface EMG signal based on fuzzy entropy</article-title><source>IEEE Trans Neural Syst Rehabil Eng</source><year>2007</year><month>06</month><volume>15</volume><issue>2</issue><fpage>266</fpage><lpage>272</lpage><pub-id pub-id-type="doi">10.1109/TNSRE.2007.897025</pub-id><pub-id pub-id-type="medline">17601197</pub-id></nlm-citation></ref><ref id="ref45"><label>45</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Pham</surname><given-names>T</given-names> </name><name name-style="western"><surname>Lau</surname><given-names>ZJ</given-names> </name><name name-style="western"><surname>Chen</surname><given-names>SHA</given-names> </name><name name-style="western"><surname>Makowski</surname><given-names>D</given-names> </name></person-group><article-title>Heart rate variability in psychology: a review of HRV indices and an analysis tutorial</article-title><source>Sensors (Basel)</source><year>2021</year><month>06</month><day>9</day><volume>21</volume><issue>12</issue><fpage>3998</fpage><pub-id pub-id-type="doi">10.3390/s21123998</pub-id><pub-id pub-id-type="medline">34207927</pub-id></nlm-citation></ref><ref id="ref46"><label>46</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Humeau-Heurtier</surname><given-names>A</given-names> </name></person-group><article-title>Evaluation of systems&#x2019; irregularity and complexity: sample entropy, its derivatives, and their applications across scales and disciplines</article-title><source>Entropy (Basel)</source><year>2018</year><month>10</month><day>16</day><volume>20</volume><issue>10</issue><fpage>794</fpage><pub-id pub-id-type="doi">10.3390/e20100794</pub-id><pub-id pub-id-type="medline">33265881</pub-id></nlm-citation></ref><ref id="ref47"><label>47</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Parvaneh</surname><given-names>S</given-names> </name><name name-style="western"><surname>Toosizadeh</surname><given-names>N</given-names> </name><name name-style="western"><surname>Moharreri</surname><given-names>S</given-names> </name></person-group><person-group person-group-type="editor"><name name-style="western"><surname>Parvaneh</surname><given-names>S</given-names> </name><name name-style="western"><surname>Toosizadeh</surname><given-names>N</given-names> </name><name name-style="western"><surname>Moharreri</surname><given-names>S</given-names> </name></person-group><article-title>Impact of mental stress on heart rate asymmetry</article-title><conf-name>2015 Computing in Cardiology Conference (CinC)</conf-name><conf-date>Sep 6-9, 2015</conf-date><pub-id pub-id-type="doi">10.1109/CIC.2015.7411030</pub-id></nlm-citation></ref><ref id="ref48"><label>48</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Yan</surname><given-names>C</given-names> </name><name name-style="western"><surname>Li</surname><given-names>P</given-names> </name><name name-style="western"><surname>Ji</surname><given-names>L</given-names> </name><name name-style="western"><surname>Yao</surname><given-names>L</given-names> </name><name name-style="western"><surname>Karmakar</surname><given-names>C</given-names> </name><name name-style="western"><surname>Liu</surname><given-names>C</given-names> </name></person-group><article-title>Area asymmetry of heart rate variability signal</article-title><source>Biomed Eng Online</source><year>2017</year><month>09</month><day>21</day><volume>16</volume><issue>1</issue><fpage>112</fpage><pub-id pub-id-type="doi">10.1186/s12938-017-0402-3</pub-id><pub-id pub-id-type="medline">28934961</pub-id></nlm-citation></ref><ref id="ref49"><label>49</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Javorka</surname><given-names>M</given-names> </name><name name-style="western"><surname>Turianikova</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Tonhajzerova</surname><given-names>I</given-names> </name><name name-style="western"><surname>Javorka</surname><given-names>K</given-names> </name><name name-style="western"><surname>Baumert</surname><given-names>M</given-names> </name></person-group><article-title>The effect of orthostasis on recurrence quantification analysis of heart rate and blood pressure dynamics</article-title><source>Physiol Meas</source><year>2009</year><month>01</month><volume>30</volume><issue>1</issue><fpage>29</fpage><lpage>41</lpage><pub-id pub-id-type="doi">10.1088/0967-3334/30/1/003</pub-id><pub-id pub-id-type="medline">19039163</pub-id></nlm-citation></ref><ref id="ref50"><label>50</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Dimitriev</surname><given-names>D</given-names> </name><name name-style="western"><surname>Saperova</surname><given-names>EV</given-names> </name><name name-style="western"><surname>Dimitriev</surname><given-names>A</given-names> </name><name name-style="western"><surname>Karpenko</surname><given-names>Y</given-names> </name></person-group><article-title>Recurrence quantification analysis of heart rate during mental arithmetic stress in young females</article-title><source>Front Physiol</source><year>2020</year><volume>11</volume><issue>40</issue><fpage>40</fpage><pub-id pub-id-type="doi">10.3389/fphys.2020.00040</pub-id><pub-id pub-id-type="medline">32116754</pub-id></nlm-citation></ref><ref id="ref51"><label>51</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Peng</surname><given-names>CK</given-names> </name><name name-style="western"><surname>Havlin</surname><given-names>S</given-names> </name><name name-style="western"><surname>Stanley</surname><given-names>HE</given-names> </name><name name-style="western"><surname>Goldberger</surname><given-names>AL</given-names> </name></person-group><article-title>Quantification of scaling exponents and crossover phenomena in nonstationary heartbeat time series</article-title><source>Chaos</source><year>1995</year><month>03</month><day>1</day><volume>5</volume><issue>1</issue><fpage>82</fpage><lpage>87</lpage><pub-id pub-id-type="doi">10.1063/1.166141</pub-id></nlm-citation></ref><ref id="ref52"><label>52</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Tulppo</surname><given-names>MP</given-names> </name><name name-style="western"><surname>M&#x00E4;kikallio</surname><given-names>TH</given-names> </name><name name-style="western"><surname>Sepp&#x00E4;nen</surname><given-names>T</given-names> </name><etal/></person-group><article-title>Effects of pharmacological adrenergic and vagal modulation on fractal heart rate dynamics</article-title><source>Clin Physiol</source><year>2001</year><month>09</month><day>24</day><volume>21</volume><issue>5</issue><fpage>515</fpage><lpage>523</lpage><pub-id pub-id-type="doi">10.1046/j.1365-2281.2001.00344.x</pub-id></nlm-citation></ref><ref id="ref53"><label>53</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Immanuel</surname><given-names>S</given-names> </name><name name-style="western"><surname>Teferra</surname><given-names>MN</given-names> </name><name name-style="western"><surname>Baumert</surname><given-names>M</given-names> </name><name name-style="western"><surname>Bidargaddi</surname><given-names>N</given-names> </name></person-group><article-title>Heart rate variability for evaluating psychological stress changes in healthy adults: a scoping review</article-title><source>Neuropsychobiology</source><year>2023</year><volume>82</volume><issue>4</issue><fpage>187</fpage><lpage>202</lpage><pub-id pub-id-type="doi">10.1159/000530376</pub-id><pub-id pub-id-type="medline">37290411</pub-id></nlm-citation></ref><ref id="ref54"><label>54</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Goya-Esteban</surname><given-names>R</given-names> </name><name name-style="western"><surname>Barquero-P&#x00E9;rez</surname><given-names>O</given-names> </name><name name-style="western"><surname>Sarabia-Cachadina</surname><given-names>E</given-names> </name><name name-style="western"><surname>Naranjo-Orellana</surname><given-names>J</given-names> </name><name name-style="western"><surname>Rojo-Alvarez</surname><given-names>JL</given-names> </name></person-group><article-title>Heart rate variability non linear dynamics in intense exercise</article-title><year>2010</year><conf-name>Computing in Cardiology</conf-name><conf-date>Sep 9-12, 2012</conf-date></nlm-citation></ref><ref id="ref55"><label>55</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Costa</surname><given-names>MD</given-names> </name><name name-style="western"><surname>Davis</surname><given-names>RB</given-names> </name><name name-style="western"><surname>Goldberger</surname><given-names>AL</given-names> </name></person-group><article-title>Heart rate fragmentation: a new approach to the analysis of cardiac interbeat interval dynamics</article-title><source>Front Physiol</source><year>2017</year><month>05</month><volume>8</volume><pub-id pub-id-type="doi">10.3389/fphys.2017.00255</pub-id></nlm-citation></ref><ref id="ref56"><label>56</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Lampert</surname><given-names>R</given-names> </name></person-group><article-title>ECG signatures of psychological stress</article-title><source>J Electrocardiol</source><year>2015</year><volume>48</volume><issue>6</issue><fpage>1000</fpage><lpage>1005</lpage><pub-id pub-id-type="doi">10.1016/j.jelectrocard.2015.08.005</pub-id><pub-id pub-id-type="medline">26364755</pub-id></nlm-citation></ref><ref id="ref57"><label>57</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Aro</surname><given-names>AL</given-names> </name><name name-style="western"><surname>Kentt&#x00E4;</surname><given-names>TV</given-names> </name><name name-style="western"><surname>Huikuri</surname><given-names>HV</given-names> </name></person-group><article-title>Microvolt T-wave alternans: where are we now?</article-title><source>Arrhythm Electrophysiol Rev</source><year>2016</year><month>05</month><volume>5</volume><issue>1</issue><fpage>37</fpage><lpage>40</lpage><pub-id pub-id-type="doi">10.15420/aer.2015.28.1</pub-id><pub-id pub-id-type="medline">27403292</pub-id></nlm-citation></ref><ref id="ref58"><label>58</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Nearing</surname><given-names>BD</given-names> </name><name name-style="western"><surname>Verrier</surname><given-names>RL</given-names> </name></person-group><article-title>Modified moving average analysis of T-wave alternans to predict ventricular fibrillation with high accuracy</article-title><source>J Appl Physiol (1985)</source><year>2002</year><month>02</month><volume>92</volume><issue>2</issue><fpage>541</fpage><lpage>549</lpage><pub-id pub-id-type="doi">10.1152/japplphysiol.00592.2001</pub-id><pub-id pub-id-type="medline">11796662</pub-id></nlm-citation></ref><ref id="ref59"><label>59</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Chen</surname><given-names>T</given-names> </name><name name-style="western"><surname>Guestrin</surname><given-names>C</given-names> </name></person-group><article-title>XGBoost: a scalable tree boosting system</article-title><conf-name>Proceedings of the 22nd ACM SIGKDD International Conference on Knowledge Discovery and Data Mining</conf-name><conf-date>Aug 13-17, 2016</conf-date><conf-loc>San Francisco California USA</conf-loc><fpage>785</fpage><lpage>794</lpage></nlm-citation></ref><ref id="ref60"><label>60</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Amann</surname><given-names>J</given-names> </name><name name-style="western"><surname>Blasimme</surname><given-names>A</given-names> </name><name name-style="western"><surname>Vayena</surname><given-names>E</given-names> </name><name name-style="western"><surname>Frey</surname><given-names>D</given-names> </name><name name-style="western"><surname>Madai</surname><given-names>VI</given-names> </name><collab>Precise4Q consortium</collab></person-group><article-title>Explainability for artificial intelligence in healthcare: a multidisciplinary perspective</article-title><source>BMC Med Inform Decis Mak</source><year>2020</year><month>11</month><day>30</day><volume>20</volume><issue>1</issue><fpage>310</fpage><pub-id pub-id-type="doi">10.1186/s12911-020-01332-6</pub-id><pub-id pub-id-type="medline">33256715</pub-id></nlm-citation></ref><ref id="ref61"><label>61</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Chaptoukaev</surname><given-names>H</given-names> </name><name name-style="western"><surname>Strizhkova</surname><given-names>V</given-names> </name><name name-style="western"><surname>Panariello</surname><given-names>M</given-names> </name><etal/></person-group><article-title>StressID: a multimodal dataset for stress identification</article-title><conf-name>Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track</conf-name><conf-date>Dec 10-16, 2023</conf-date></nlm-citation></ref><ref id="ref62"><label>62</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Heimerl</surname><given-names>A</given-names> </name><name name-style="western"><surname>Prajod</surname><given-names>P</given-names> </name><name name-style="western"><surname>Mertes</surname><given-names>S</given-names> </name><etal/></person-group><article-title>The ForDigitStress dataset: a multi-modal dataset for automatic stress recognition</article-title><source>IEEE Trans Affective Comput</source><volume>16</volume><issue>2</issue><fpage>1219</fpage><lpage>1234</lpage><pub-id pub-id-type="doi">10.1109/TAFFC.2024.3501400</pub-id></nlm-citation></ref><ref id="ref63"><label>63</label><nlm-citation citation-type="confproc"><person-group person-group-type="editor"><name name-style="western"><surname>Sun</surname><given-names>FT</given-names> </name><name name-style="western"><surname>Kuo</surname><given-names>C</given-names> </name><name name-style="western"><surname>Cheng</surname><given-names>HT</given-names> </name><name name-style="western"><surname>Buthpitiya</surname><given-names>S</given-names> </name><name name-style="western"><surname>Collins</surname><given-names>P</given-names> </name><name name-style="western"><surname>Griss</surname><given-names>M</given-names> </name></person-group><article-title>Activity-aware mental stress detection using physiological sensors</article-title><conf-name>Mobile Computing, Applications, and Services MobiCASE 2010</conf-name><conf-date>Oct 25-28, 2010</conf-date></nlm-citation></ref><ref id="ref64"><label>64</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Grinsztajn</surname><given-names>L</given-names> </name><name name-style="western"><surname>Oyallon</surname><given-names>E</given-names> </name><name name-style="western"><surname>Varoquaux</surname><given-names>G</given-names> </name></person-group><article-title>Why do tree-based models still outperform deep learning on typical tabular data</article-title><conf-name>Proceedings of the 36th International Conference on Neural Information Processing Systems</conf-name><conf-date>Nov 28 to Dec 3, 2022</conf-date><conf-loc>New Orleans, Louisiana, United States</conf-loc><fpage>507</fpage><lpage>520</lpage></nlm-citation></ref><ref id="ref65"><label>65</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Shwartz-Ziv</surname><given-names>R</given-names> </name><name name-style="western"><surname>Armon</surname><given-names>A</given-names> </name></person-group><article-title>Tabular data: deep learning is not all you need</article-title><source>Information Fusion</source><year>2022</year><month>05</month><volume>81</volume><issue>C</issue><fpage>84</fpage><lpage>90</lpage><pub-id pub-id-type="doi">10.1016/j.inffus.2021.11.011</pub-id></nlm-citation></ref><ref id="ref66"><label>66</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Schallmoser</surname><given-names>S</given-names> </name><name name-style="western"><surname>Zueger</surname><given-names>T</given-names> </name><name name-style="western"><surname>Kraus</surname><given-names>M</given-names> </name><name name-style="western"><surname>Saar-Tsechansky</surname><given-names>M</given-names> </name><name name-style="western"><surname>Stettler</surname><given-names>C</given-names> </name><name name-style="western"><surname>Feuerriegel</surname><given-names>S</given-names> </name></person-group><article-title>Machine learning for predicting micro- and macrovascular complications in individuals with prediabetes or diabetes: retrospective cohort study</article-title><source>J Med Internet Res</source><year>2023</year><volume>25</volume><fpage>e42181</fpage><pub-id pub-id-type="doi">10.2196/42181</pub-id></nlm-citation></ref><ref id="ref67"><label>67</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Healey</surname><given-names>JA</given-names> </name><name name-style="western"><surname>Picard</surname><given-names>RW</given-names> </name></person-group><article-title>Detecting stress during real-world driving tasks using physiological sensors</article-title><source>IEEE Trans Intell Transport Syst</source><year>2005</year><month>06</month><volume>6</volume><issue>2</issue><fpage>156</fpage><lpage>166</lpage><pub-id pub-id-type="doi">10.1109/TITS.2005.848368</pub-id></nlm-citation></ref><ref id="ref68"><label>68</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Saygin</surname><given-names>M</given-names> </name><name name-style="western"><surname>Schoenmakers</surname><given-names>M</given-names> </name><name name-style="western"><surname>Gevonden</surname><given-names>M</given-names> </name><name name-style="western"><surname>de Geus</surname><given-names>E</given-names> </name></person-group><article-title>Speech detection via respiratory inductance plethysmography, thoracic impedance, accelerometers, and gyroscopes: a machine learning-informed comparative study</article-title><source>Psychophysiology</source><year>2025</year><month>02</month><volume>62</volume><issue>2</issue><fpage>e70021</fpage><pub-id pub-id-type="doi">10.1111/psyp.70021</pub-id><pub-id pub-id-type="medline">39950497</pub-id></nlm-citation></ref><ref id="ref69"><label>69</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Lundberg</surname><given-names>SM</given-names> </name><name name-style="western"><surname>Lee</surname><given-names>SI</given-names> </name></person-group><article-title>A unified approach to interpreting model predictions</article-title><conf-name>Proceedings of the 31st International Conference on Neural Information Processing Systems</conf-name><conf-date>Dec 4-9, 2017</conf-date><conf-loc>Long Beach, California, United States</conf-loc><fpage>4768</fpage><lpage>4777</lpage></nlm-citation></ref><ref id="ref70"><label>70</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Alba</surname><given-names>AC</given-names> </name><name name-style="western"><surname>Agoritsas</surname><given-names>T</given-names> </name><name name-style="western"><surname>Walsh</surname><given-names>M</given-names> </name><etal/></person-group><article-title>Discrimination and calibration of clinical prediction models: users&#x2019; guides to the medical literature</article-title><source>JAMA</source><year>2017</year><month>10</month><day>10</day><volume>318</volume><issue>14</issue><fpage>1377</fpage><lpage>1384</lpage><pub-id pub-id-type="doi">10.1001/jama.2017.12126</pub-id><pub-id pub-id-type="medline">29049590</pub-id></nlm-citation></ref><ref id="ref71"><label>71</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Huang</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Li</surname><given-names>W</given-names> </name><name name-style="western"><surname>Macheret</surname><given-names>F</given-names> </name><name name-style="western"><surname>Gabriel</surname><given-names>RA</given-names> </name><name name-style="western"><surname>Ohno-Machado</surname><given-names>L</given-names> </name></person-group><article-title>A tutorial on calibration measurements and calibration models for clinical prediction models</article-title><source>J Am Med Inform Assoc</source><year>2020</year><month>04</month><day>1</day><volume>27</volume><issue>4</issue><fpage>621</fpage><lpage>633</lpage><pub-id pub-id-type="doi">10.1093/jamia/ocz228</pub-id><pub-id pub-id-type="medline">32106284</pub-id></nlm-citation></ref><ref id="ref72"><label>72</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Brier</surname><given-names>GW</given-names> </name></person-group><article-title>Verification of forecasts expressed in terms of probability</article-title><source>Mon Wea Rev</source><year>1950</year><month>01</month><volume>78</volume><issue>1</issue><fpage>1</fpage><lpage>3</lpage><pub-id pub-id-type="doi">10.1175/1520-0493(1950)078&#x003C;0001:VOFEIT&#x003E;2.0.CO;2</pub-id></nlm-citation></ref><ref id="ref73"><label>73</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Maiga</surname><given-names>A</given-names> </name><name name-style="western"><surname>Farjah</surname><given-names>F</given-names> </name><name name-style="western"><surname>Blume</surname><given-names>J</given-names> </name><etal/></person-group><article-title>Risk prediction in clinical practice: a practical guide for cardiothoracic surgeons</article-title><source>Ann Thorac Surg</source><year>2019</year><month>11</month><volume>108</volume><issue>5</issue><fpage>1573</fpage><lpage>1582</lpage><pub-id pub-id-type="doi">10.1016/j.athoracsur.2019.04.126</pub-id><pub-id pub-id-type="medline">31255609</pub-id></nlm-citation></ref><ref id="ref74"><label>74</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Pearson</surname><given-names>K</given-names> </name></person-group><article-title>LIII. On lines and planes of closest fit to systems of points in space</article-title><source>Lond Edinb Dubl Phil Mag J Sci</source><year>1901</year><month>11</month><volume>2</volume><issue>11</issue><fpage>559</fpage><lpage>572</lpage><pub-id pub-id-type="doi">10.1080/14786440109462720</pub-id></nlm-citation></ref><ref id="ref75"><label>75</label><nlm-citation citation-type="web"><person-group person-group-type="author"><name name-style="western"><surname>Uendes</surname><given-names>B</given-names> </name><name name-style="western"><surname>Antonides</surname><given-names>A</given-names> </name></person-group><article-title>Stress detection ECG</article-title><source>GitHub</source><year>2025</year><month>07</month><access-date>2025-07-11</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://github.com/BuelentUendes/Stress_Detection_ECG">https://github.com/BuelentUendes/Stress_Detection_ECG</ext-link></comment></nlm-citation></ref><ref id="ref76"><label>76</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Chawla</surname><given-names>NV</given-names> </name><name name-style="western"><surname>Bowyer</surname><given-names>KW</given-names> </name><name name-style="western"><surname>Hall</surname><given-names>LO</given-names> </name><name name-style="western"><surname>Kegelmeyer</surname><given-names>WP</given-names> </name></person-group><article-title>SMOTE: synthetic minority over-sampling technique</article-title><source>J Artif Intell Res</source><year>2002</year><volume>16</volume><issue>1</issue><fpage>321</fpage><lpage>357</lpage><pub-id pub-id-type="doi">10.1613/jair.953</pub-id></nlm-citation></ref><ref id="ref77"><label>77</label><nlm-citation citation-type="web"><article-title>Your research: subject to the WMO or not?</article-title><source>Central Committee on Research Involving Human Subjects</source><access-date>2026-03-22</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://english.ccmo.nl/investigators/legal-framework-for-medical-scientific-research/your-research-is-it-subject-to-the-wmo-or-not">https://english.ccmo.nl/investigators/legal-framework-for-medical-scientific-research/your-research-is-it-subject-to-the-wmo-or-not</ext-link></comment></nlm-citation></ref><ref id="ref78"><label>78</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>DeLong</surname><given-names>ER</given-names> </name><name name-style="western"><surname>DeLong</surname><given-names>DM</given-names> </name><name name-style="western"><surname>Clarke-Pearson</surname><given-names>DL</given-names> </name></person-group><article-title>Comparing the areas under two or more correlated receiver operating characteristic curves: a nonparametric approach</article-title><source>Biometrics</source><year>1988</year><month>09</month><volume>44</volume><issue>3</issue><fpage>837</fpage><lpage>845</lpage><pub-id pub-id-type="doi">10.2307/2531595</pub-id><pub-id pub-id-type="medline">3203132</pub-id></nlm-citation></ref><ref id="ref79"><label>79</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Obuchowski</surname><given-names>NA</given-names> </name></person-group><article-title>Nonparametric analysis of clustered ROC curve data</article-title><source>Biometrics</source><year>1997</year><month>06</month><volume>53</volume><issue>2</issue><fpage>567</fpage><lpage>578</lpage><pub-id pub-id-type="doi">10.2307/2533958</pub-id><pub-id pub-id-type="medline">9192452</pub-id></nlm-citation></ref><ref id="ref80"><label>80</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Breiman</surname><given-names>L</given-names> </name></person-group><article-title>Statistical modeling: the two cultures (with comments and a rejoinder by the author)</article-title><source>Statist Sci</source><year>2001</year><month>08</month><volume>16</volume><issue>3</issue><fpage>199</fpage><lpage>231</lpage><pub-id pub-id-type="doi">10.1214/ss/1009213726</pub-id></nlm-citation></ref><ref id="ref81"><label>81</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Rudin</surname><given-names>C</given-names> </name><name name-style="western"><surname>Zhong</surname><given-names>C</given-names> </name><name name-style="western"><surname>Semenova</surname><given-names>L</given-names> </name><name name-style="western"><surname>Seltzer</surname><given-names>M</given-names> </name><name name-style="western"><surname>Parr</surname><given-names>R</given-names> </name><name name-style="western"><surname>Liu</surname><given-names>J</given-names> </name><etal/></person-group><person-group person-group-type="editor"><name name-style="western"><surname>Ruslan</surname><given-names>S</given-names> </name><name name-style="western"><surname>Zico</surname><given-names>K</given-names> </name><name name-style="western"><surname>Katherine</surname><given-names>H</given-names> </name><name name-style="western"><surname>Adrian</surname><given-names>W</given-names> </name><name name-style="western"><surname>Nuria</surname><given-names>O</given-names> </name><name name-style="western"><surname>Jonathan</surname><given-names>S</given-names> </name></person-group><article-title>Position: amazing things come from having many good models</article-title><conf-name>Proceedings of the 41st International Conference on Machine Learning</conf-name><conf-date>Jul 21-27, 2024</conf-date><conf-loc>Vienna, Austria</conf-loc><fpage>42783</fpage><lpage>42795</lpage></nlm-citation></ref><ref id="ref82"><label>82</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Semenova</surname><given-names>L</given-names> </name><name name-style="western"><surname>Chen</surname><given-names>H</given-names> </name><name name-style="western"><surname>Parr</surname><given-names>R</given-names> </name><name name-style="western"><surname>Rudin</surname><given-names>C</given-names> </name></person-group><article-title>A path to simpler models starts with noise</article-title><source>Adv Neural Inf Process Syst</source><year>2023</year><month>12</month><volume>36</volume><fpage>3362</fpage><lpage>3401</lpage><pub-id pub-id-type="medline">38577617</pub-id></nlm-citation></ref><ref id="ref83"><label>83</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Hwang</surname><given-names>B</given-names> </name><name name-style="western"><surname>You</surname><given-names>J</given-names> </name><name name-style="western"><surname>Vaessen</surname><given-names>T</given-names> </name><name name-style="western"><surname>Myin-Germeys</surname><given-names>I</given-names> </name><name name-style="western"><surname>Park</surname><given-names>C</given-names> </name><name name-style="western"><surname>Zhang</surname><given-names>BT</given-names> </name></person-group><article-title>Deep ECGNet: an optimal deep learning framework for monitoring mental stress using ultra short-term ECG signals</article-title><source>Telemed J E Health</source><year>2018</year><month>10</month><volume>24</volume><issue>10</issue><fpage>753</fpage><lpage>772</lpage><pub-id pub-id-type="doi">10.1089/tmj.2017.0250</pub-id><pub-id pub-id-type="medline">29420125</pub-id></nlm-citation></ref><ref id="ref84"><label>84</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Zhang</surname><given-names>P</given-names> </name><name name-style="western"><surname>Li</surname><given-names>F</given-names> </name><name name-style="western"><surname>Zhao</surname><given-names>R</given-names> </name><etal/></person-group><article-title>Real-time psychological stress detection according to ECG using deep learning</article-title><source>Appl Sci (Basel)</source><year>2021</year><volume>11</volume><issue>9</issue><fpage>3838</fpage><pub-id pub-id-type="doi">10.3390/app11093838</pub-id></nlm-citation></ref><ref id="ref85"><label>85</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Crabb&#x00E9;</surname><given-names>J</given-names> </name><name name-style="western"><surname>Schaar</surname><given-names>MVD</given-names> </name></person-group><person-group person-group-type="editor"><name name-style="western"><surname>Marina</surname><given-names>M</given-names> </name><name name-style="western"><surname>Tong</surname><given-names>Z</given-names> </name></person-group><article-title>Explaining time series predictions with dynamic masks</article-title><conf-name>Proceedings of the 38th International Conference on Machine Learning</conf-name><conf-date>Jul 18-24, 2021</conf-date><conf-loc>Virtual / Online</conf-loc><fpage>2166</fpage><lpage>2177</lpage></nlm-citation></ref><ref id="ref86"><label>86</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Enguehard</surname><given-names>J</given-names> </name></person-group><person-group person-group-type="editor"><name name-style="western"><surname>Andreas</surname><given-names>K</given-names> </name><name name-style="western"><surname>Emma</surname><given-names>B</given-names> </name><name name-style="western"><surname>Kyunghyun</surname><given-names>C</given-names> </name><name name-style="western"><surname>Barbara</surname><given-names>E</given-names> </name><name name-style="western"><surname>Sivan</surname><given-names>S</given-names> </name><name name-style="western"><surname>Jonathan</surname><given-names>S</given-names> </name></person-group><article-title>Learning perturbations to explain time series predictions</article-title><conf-name>Proceedings of the 40th International Conference on Machine Learning</conf-name><conf-date>Jul 23-29, 2023</conf-date><conf-loc>Honolulu, Hawaii, United States</conf-loc><fpage>9329</fpage><lpage>9342</lpage></nlm-citation></ref><ref id="ref87"><label>87</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Chand</surname><given-names>K</given-names> </name><name name-style="western"><surname>Chandra</surname><given-names>S</given-names> </name><name name-style="western"><surname>Dutt</surname><given-names>V</given-names> </name></person-group><article-title>A comprehensive evaluation of linear and non-linear HRV parameters between paced breathing and stressful mental state</article-title><source>Heliyon</source><year>2024</year><month>06</month><volume>10</volume><issue>11</issue><fpage>e32195</fpage><pub-id pub-id-type="doi">10.1016/j.heliyon.2024.e32195</pub-id></nlm-citation></ref><ref id="ref88"><label>88</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Taoum</surname><given-names>A</given-names> </name><name name-style="western"><surname>Bisiaux</surname><given-names>A</given-names> </name><name name-style="western"><surname>Tilquin</surname><given-names>F</given-names> </name><name name-style="western"><surname>Le Guillou</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Carrault</surname><given-names>G</given-names> </name></person-group><article-title>Validity of ultra-short-term HRV analysis using PPG&#x2014;a preliminary study</article-title><source>Sensors (Basel)</source><year>2022</year><month>10</month><day>20</day><volume>22</volume><issue>20</issue><fpage>7995</fpage><pub-id pub-id-type="doi">10.3390/s22207995</pub-id></nlm-citation></ref><ref id="ref89"><label>89</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Maeda</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Sekine</surname><given-names>M</given-names> </name><name name-style="western"><surname>Tamura</surname><given-names>T</given-names> </name></person-group><article-title>Relationship between measurement site and motion artifacts in wearable reflected photoplethysmography</article-title><source>J Med Syst</source><year>2011</year><month>10</month><volume>35</volume><issue>5</issue><fpage>969</fpage><lpage>976</lpage><pub-id pub-id-type="doi">10.1007/s10916-010-9505-0</pub-id></nlm-citation></ref><ref id="ref90"><label>90</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Gedam</surname><given-names>S</given-names> </name><name name-style="western"><surname>Paul</surname><given-names>S</given-names> </name></person-group><article-title>A review on mental stress detection using wearable sensors and machine learning techniques</article-title><source>IEEE Access</source><year>2021</year><volume>9</volume><fpage>84045</fpage><lpage>84066</lpage><pub-id pub-id-type="doi">10.1109/ACCESS.2021.3085502</pub-id></nlm-citation></ref><ref id="ref91"><label>91</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sharma</surname><given-names>N</given-names> </name><name name-style="western"><surname>Gedeon</surname><given-names>T</given-names> </name></person-group><article-title>Objective measures, sensors and computational techniques for stress recognition and classification: a survey</article-title><source>Comput Methods Programs Biomed</source><year>2012</year><month>12</month><volume>108</volume><issue>3</issue><fpage>1287</fpage><lpage>1301</lpage><pub-id pub-id-type="doi">10.1016/j.cmpb.2012.07.003</pub-id><pub-id pub-id-type="medline">22921417</pub-id></nlm-citation></ref><ref id="ref92"><label>92</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Can</surname><given-names>YS</given-names> </name><name name-style="western"><surname>Arnrich</surname><given-names>B</given-names> </name><name name-style="western"><surname>Ersoy</surname><given-names>C</given-names> </name></person-group><article-title>Stress detection in daily life scenarios using smart phones and wearable sensors: a survey</article-title><source>J Biomed Inform</source><year>2019</year><month>04</month><volume>92</volume><fpage>103139</fpage><pub-id pub-id-type="doi">10.1016/j.jbi.2019.103139</pub-id></nlm-citation></ref><ref id="ref93"><label>93</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Pataca</surname><given-names>AO</given-names> </name><name name-style="western"><surname>Zdravevski</surname><given-names>E</given-names> </name><name name-style="western"><surname>Coelho</surname><given-names>PJ</given-names> </name><etal/></person-group><article-title>Use of machine learning for predicting stress episodes based on wearable sensor data: a systematic review</article-title><source>Comput Biol Med</source><year>2025</year><month>11</month><volume>198</volume><fpage>111166</fpage><pub-id pub-id-type="doi">10.1016/j.compbiomed.2025.111166</pub-id></nlm-citation></ref><ref id="ref94"><label>94</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kirschbaum</surname><given-names>C</given-names> </name><name name-style="western"><surname>Pirke</surname><given-names>KM</given-names> </name><name name-style="western"><surname>Hellhammer</surname><given-names>DH</given-names> </name></person-group><article-title>The &#x2018;Trier Social Stress Test&#x2019; &#x2013; a tool for investigating psychobiological stress responses in a laboratory setting</article-title><source>Neuropsychobiology</source><year>1993</year><volume>28</volume><issue>1-2</issue><fpage>76</fpage><lpage>81</lpage><pub-id pub-id-type="doi">10.1159/000119004</pub-id></nlm-citation></ref><ref id="ref95"><label>95</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Stroop</surname><given-names>JR</given-names> </name></person-group><article-title>Studies of interference in serial verbal reactions</article-title><source>J Exp Psychol</source><year>1935</year><volume>18</volume><issue>6</issue><fpage>643</fpage><lpage>662</lpage><pub-id pub-id-type="doi">10.1037/h0054651</pub-id></nlm-citation></ref><ref id="ref96"><label>96</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Dickerson</surname><given-names>SS</given-names> </name><name name-style="western"><surname>Kemeny</surname><given-names>ME</given-names> </name></person-group><article-title>Acute stressors and cortisol responses: a theoretical integration and synthesis of laboratory research</article-title><source>Psychol Bull</source><year>2004</year><month>05</month><volume>130</volume><issue>3</issue><fpage>355</fpage><lpage>391</lpage><pub-id pub-id-type="doi">10.1037/0033-2909.130.3.355</pub-id></nlm-citation></ref><ref id="ref97"><label>97</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Epel</surname><given-names>ES</given-names> </name><name name-style="western"><surname>Crosswell</surname><given-names>AD</given-names> </name><name name-style="western"><surname>Mayer</surname><given-names>SE</given-names> </name><etal/></person-group><article-title>More than a feeling: a unified view of stress measurement for population science</article-title><source>Front Neuroendocrinol</source><year>2018</year><month>04</month><volume>49</volume><fpage>146</fpage><lpage>169</lpage><pub-id pub-id-type="doi">10.1016/j.yfrne.2018.03.001</pub-id><pub-id pub-id-type="medline">29551356</pub-id></nlm-citation></ref><ref id="ref98"><label>98</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Vos</surname><given-names>G</given-names> </name><name name-style="western"><surname>Trinh</surname><given-names>K</given-names> </name><name name-style="western"><surname>Sarnyai</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Rahimi Azghadi</surname><given-names>M</given-names> </name></person-group><article-title>Generalizable machine learning for stress monitoring from wearable devices: a systematic literature review</article-title><source>Int J Med Inform</source><year>2023</year><month>05</month><volume>173</volume><fpage>105026</fpage><pub-id pub-id-type="doi">10.1016/j.ijmedinf.2023.105026</pub-id><pub-id pub-id-type="medline">36893657</pub-id></nlm-citation></ref><ref id="ref99"><label>99</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Smets</surname><given-names>E</given-names> </name><name name-style="western"><surname>Rios Velazquez</surname><given-names>E</given-names> </name><name name-style="western"><surname>Schiavone</surname><given-names>G</given-names> </name><etal/></person-group><article-title>Large-scale wearable data reveal digital phenotypes for daily-life stress detection</article-title><source>npj Digital Med</source><year>2018</year><volume>1</volume><issue>1</issue><fpage>67</fpage><pub-id pub-id-type="doi">10.1038/s41746-018-0074-9</pub-id></nlm-citation></ref><ref id="ref100"><label>100</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Huang</surname><given-names>S</given-names> </name><name name-style="western"><surname>Li</surname><given-names>J</given-names> </name><name name-style="western"><surname>Zhang</surname><given-names>P</given-names> </name><name name-style="western"><surname>Zhang</surname><given-names>W</given-names> </name></person-group><article-title>Detection of mental fatigue state with wearable ECG devices</article-title><source>Int J Med Inform</source><year>2018</year><month>11</month><volume>119</volume><fpage>39</fpage><lpage>46</lpage><pub-id pub-id-type="doi">10.1016/j.ijmedinf.2018.08.010</pub-id></nlm-citation></ref><ref id="ref101"><label>101</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Han</surname><given-names>HJ</given-names> </name><name name-style="western"><surname>Labbaf</surname><given-names>S</given-names> </name><name name-style="western"><surname>Borelli</surname><given-names>JL</given-names> </name><name name-style="western"><surname>Dutt</surname><given-names>N</given-names> </name><name name-style="western"><surname>Rahmani</surname><given-names>AM</given-names> </name></person-group><article-title>Objective stress monitoring based on wearable sensors in everyday settings</article-title><source>J Med Eng Technol</source><year>2020</year><month>05</month><volume>44</volume><issue>4</issue><fpage>177</fpage><lpage>189</lpage><pub-id pub-id-type="doi">10.1080/03091902.2020.1759707</pub-id><pub-id pub-id-type="medline">32589065</pub-id></nlm-citation></ref><ref id="ref102"><label>102</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Weber</surname><given-names>C</given-names> </name><name name-style="western"><surname>Quintus</surname><given-names>M</given-names> </name><name name-style="western"><surname>Egloff</surname><given-names>B</given-names> </name><name name-style="western"><surname>Luong</surname><given-names>G</given-names> </name><name name-style="western"><surname>Riediger</surname><given-names>M</given-names> </name><name name-style="western"><surname>Wrzus</surname><given-names>C</given-names> </name></person-group><article-title>Same old, same old? age differences in the diversity of daily life</article-title><source>Psychol Aging</source><year>2020</year><month>05</month><volume>35</volume><issue>3</issue><fpage>434</fpage><lpage>448</lpage><pub-id pub-id-type="doi">10.1037/pag0000407</pub-id><pub-id pub-id-type="medline">31613136</pub-id></nlm-citation></ref><ref id="ref103"><label>103</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Umetani</surname><given-names>K</given-names> </name><name name-style="western"><surname>Singer</surname><given-names>DH</given-names> </name><name name-style="western"><surname>McCraty</surname><given-names>R</given-names> </name><name name-style="western"><surname>Atkinson</surname><given-names>M</given-names> </name></person-group><article-title>Twenty-four hour time domain heart rate variability and heart rate: relations to age and gender over nine decades</article-title><source>J Am Coll Cardiol</source><year>1998</year><month>03</month><day>1</day><volume>31</volume><issue>3</issue><fpage>593</fpage><lpage>601</lpage><pub-id pub-id-type="doi">10.1016/s0735-1097(97)00554-8</pub-id><pub-id pub-id-type="medline">9502641</pub-id></nlm-citation></ref><ref id="ref104"><label>104</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wilder</surname><given-names>J</given-names> </name></person-group><article-title>The law of initial value in neurology and psychiatry</article-title><source>J Nerv Ment Dis</source><year>1957</year><month>01</month><volume>125</volume><issue>1</issue><fpage>73</fpage><lpage>86</lpage><pub-id pub-id-type="doi">10.1097/00005053-195701000-00009</pub-id></nlm-citation></ref><ref id="ref105"><label>105</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Benchekroun</surname><given-names>M</given-names> </name><name name-style="western"><surname>Velmovitsky</surname><given-names>PE</given-names> </name><name name-style="western"><surname>Istrate</surname><given-names>D</given-names> </name><name name-style="western"><surname>Zalc</surname><given-names>V</given-names> </name><name name-style="western"><surname>Morita</surname><given-names>PP</given-names> </name><name name-style="western"><surname>Lenne</surname><given-names>D</given-names> </name></person-group><article-title>Cross dataset analysis for generalizability of HRV-based stress detection models</article-title><source>Sensors (Basel)</source><year>2023</year><month>02</month><day>6</day><volume>23</volume><issue>4</issue><fpage>1807</fpage><pub-id pub-id-type="doi">10.3390/s23041807</pub-id><pub-id pub-id-type="medline">36850407</pub-id></nlm-citation></ref><ref id="ref106"><label>106</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Karimi</surname><given-names>D</given-names> </name><name name-style="western"><surname>Dou</surname><given-names>H</given-names> </name><name name-style="western"><surname>Warfield</surname><given-names>SK</given-names> </name><name name-style="western"><surname>Gholipour</surname><given-names>A</given-names> </name></person-group><article-title>Deep learning with noisy labels: exploring techniques and remedies in medical image analysis</article-title><source>Med Image Anal</source><year>2020</year><month>10</month><volume>65</volume><fpage>101759</fpage><pub-id pub-id-type="doi">10.1016/j.media.2020.101759</pub-id><pub-id pub-id-type="medline">32623277</pub-id></nlm-citation></ref><ref id="ref107"><label>107</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ding</surname><given-names>C</given-names> </name><name name-style="western"><surname>Pereira</surname><given-names>T</given-names> </name><name name-style="western"><surname>Xiao</surname><given-names>R</given-names> </name><name name-style="western"><surname>Lee</surname><given-names>RJ</given-names> </name><name name-style="western"><surname>Hu</surname><given-names>X</given-names> </name></person-group><article-title>Impact of label noise on the learning based models for a binary classification of physiological signal</article-title><source>Sensors (Basel)</source><year>2022</year><volume>22</volume><issue>19</issue><fpage>7166</fpage><pub-id pub-id-type="doi">10.3390/s22197166</pub-id></nlm-citation></ref><ref id="ref108"><label>108</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Song</surname><given-names>H</given-names> </name><name name-style="western"><surname>Kim</surname><given-names>M</given-names> </name><name name-style="western"><surname>Park</surname><given-names>D</given-names> </name><name name-style="western"><surname>Shin</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Lee</surname><given-names>JG</given-names> </name></person-group><article-title>Learning from noisy labels with deep neural networks: a survey</article-title><source>IEEE Trans Neural Netw Learning Syst</source><year>2023</year><volume>34</volume><issue>11</issue><fpage>8135</fpage><lpage>8153</lpage><pub-id pub-id-type="doi">10.1109/TNNLS.2022.3152527</pub-id></nlm-citation></ref><ref id="ref109"><label>109</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Fr&#x00E9;nay</surname><given-names>B</given-names> </name><name name-style="western"><surname>Verleysen</surname><given-names>M</given-names> </name></person-group><article-title>Classification in the presence of label noise: a survey</article-title><source>IEEE Trans Neural Netw Learn Syst</source><year>2014</year><month>05</month><volume>25</volume><issue>5</issue><fpage>845</fpage><lpage>869</lpage><pub-id pub-id-type="doi">10.1109/TNNLS.2013.2292894</pub-id><pub-id pub-id-type="medline">24808033</pub-id></nlm-citation></ref><ref id="ref110"><label>110</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Liu</surname><given-names>T</given-names> </name><name name-style="western"><surname>Tao</surname><given-names>D</given-names> </name></person-group><article-title>Classification with noisy labels by importance reweighting</article-title><source>IEEE Trans Pattern Anal Mach Intell</source><year>2016</year><volume>38</volume><issue>3</issue><fpage>447</fpage><lpage>461</lpage><pub-id pub-id-type="doi">10.1109/TPAMI.2015.2456899</pub-id></nlm-citation></ref><ref id="ref111"><label>111</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Natarajan</surname><given-names>N</given-names> </name><name name-style="western"><surname>Dhillon</surname><given-names>IS</given-names> </name><name name-style="western"><surname>Ravikumar</surname><given-names>P</given-names> </name><name name-style="western"><surname>Tewari</surname><given-names>A</given-names> </name></person-group><article-title>Learning with noisy labels</article-title><conf-name>Proceedings of the 27th International Conference on Neural Information Processing Systems</conf-name><conf-date>Dec 5-10, 2013</conf-date><conf-loc>Lake Tahoe, Nevada, United States</conf-loc><fpage>1196</fpage><lpage>1204</lpage></nlm-citation></ref><ref id="ref112"><label>112</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Hosseini</surname><given-names>S</given-names> </name><name name-style="western"><surname>Gottumukkala</surname><given-names>R</given-names> </name><name name-style="western"><surname>Katragadda</surname><given-names>S</given-names> </name><etal/></person-group><article-title>A multimodal sensor dataset for continuous stress detection of nurses in a hospital</article-title><source>Sci Data</source><year>2022</year><month>06</month><day>1</day><volume>9</volume><issue>1</issue><fpage>255</fpage><pub-id pub-id-type="doi">10.1038/s41597-022-01361-y</pub-id><pub-id pub-id-type="medline">35650267</pub-id></nlm-citation></ref><ref id="ref113"><label>113</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kaczor</surname><given-names>EE</given-names> </name><name name-style="western"><surname>Carreiro</surname><given-names>S</given-names> </name><name name-style="western"><surname>Stapp</surname><given-names>J</given-names> </name><name name-style="western"><surname>Chapman</surname><given-names>B</given-names> </name><name name-style="western"><surname>Indic</surname><given-names>P</given-names> </name></person-group><article-title>Objective measurement of physician stress in the emergency department using a wearable sensor</article-title><source>Proc Annu Hawaii Int Conf Syst Sci</source><year>2020</year><volume>2020</volume><fpage>3729</fpage><lpage>3738</lpage><pub-id pub-id-type="medline">32015695</pub-id></nlm-citation></ref></ref-list><app-group><supplementary-material id="app1"><label>Multimedia Appendix 1</label><p>Overview of features.</p><media xlink:href="jmir_v28i1e80450_app1.pdf" xlink:title="PDF File, 53 KB"/></supplementary-material><supplementary-material id="app2"><label>Multimedia Appendix 2</label><p>Hyperparameter settings.</p><media xlink:href="jmir_v28i1e80450_app2.pdf" xlink:title="PDF File, 26 KB"/></supplementary-material><supplementary-material id="app3"><label>Multimedia Appendix 3</label><p>Boxplots positive and negative affect.</p><media xlink:href="jmir_v28i1e80450_app3.pdf" xlink:title="PDF File, 448 KB"/></supplementary-material><supplementary-material id="app4"><label>Multimedia Appendix 4</label><p>Area under the precision-recall curve classification performance.</p><media xlink:href="jmir_v28i1e80450_app4.pdf" xlink:title="PDF File, 133 KB"/></supplementary-material><supplementary-material id="app5"><label>Multimedia Appendix 5</label><p>CIs for performance differences between XGBoost and LR.</p><media xlink:href="jmir_v28i1e80450_app5.pdf" xlink:title="PDF File, 40 KB"/></supplementary-material><supplementary-material id="app6"><label>Multimedia Appendix 6</label><p>Additional classification performance results.</p><media xlink:href="jmir_v28i1e80450_app6.pdf" xlink:title="PDF File, 685 KB"/></supplementary-material><supplementary-material id="app7"><label>Multimedia Appendix 7</label><p>Seated baseline classification performance.</p><media xlink:href="jmir_v28i1e80450_app7.pdf" xlink:title="PDF File, 358 KB"/></supplementary-material><supplementary-material id="app8"><label>Multimedia Appendix 8</label><p>Time window analysis.</p><media xlink:href="jmir_v28i1e80450_app8.pdf" xlink:title="PDF File, 339 KB"/></supplementary-material><supplementary-material id="app9"><label>Multimedia Appendix 9</label><p>Model explainability.</p><media xlink:href="jmir_v28i1e80450_app9.pdf" xlink:title="PDF File, 764 KB"/></supplementary-material><supplementary-material id="app10"><label>Multimedia Appendix 10</label><p>Feature parsimony.</p><media xlink:href="jmir_v28i1e80450_app10.pdf" xlink:title="PDF File, 151 KB"/></supplementary-material><supplementary-material id="app11"><label>Multimedia Appendix 11</label><p>Overview of selected features.</p><media xlink:href="jmir_v28i1e80450_app11.pdf" xlink:title="PDF File, 33 KB"/></supplementary-material><supplementary-material id="app12"><label>Multimedia Appendix 12</label><p>Generalization to unknown stressors.</p><media xlink:href="jmir_v28i1e80450_app12.pdf" xlink:title="PDF File, 56 KB"/></supplementary-material><supplementary-material id="app13"><label>Multimedia Appendix 13</label><p>Stratified stressor analysis.</p><media xlink:href="jmir_v28i1e80450_app13.pdf" xlink:title="PDF File, 47 KB"/></supplementary-material><supplementary-material id="app14"><label>Multimedia Appendix 14</label><p>Stratified stressor analysis - single sensor training.</p><media xlink:href="jmir_v28i1e80450_app14.pdf" xlink:title="PDF File, 49 KB"/></supplementary-material></app-group></back></article>