<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMIR</journal-id>
      <journal-id journal-id-type="nlm-ta">J Med Internet Res</journal-id>
      <journal-title>Journal of Medical Internet Research</journal-title>
      <issn pub-type="epub">1438-8871</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v22i10e17738</article-id>
      <article-id pub-id-type="pmid">33112241</article-id>
      <article-id pub-id-type="doi">10.2196/17738</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Developing a Process for the Analysis of User Journeys and the Prediction of Dropout in Digital Health Interventions: Machine Learning Approach</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Eysenbach</surname>
            <given-names>Gunther</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Wolff</surname>
            <given-names>Justus</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Jacobson</surname>
            <given-names>Nicholas</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Wong</surname>
            <given-names>Zoie SY</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Oldenburg</surname>
            <given-names>Jan</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author" corresp="yes" equal-contrib="yes">
          <name name-style="western">
            <surname>Bremer</surname>
            <given-names>Vincent</given-names>
          </name>
          <degrees>MA</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>Institute of Information Systems</institution>
            <institution>Leuphana University Lüneburg</institution>
            <addr-line>C4.320</addr-line>
            <addr-line>Lüneburg, 21335</addr-line>
            <country>Germany</country>
            <phone>49 41316771157</phone>
            <email>vincent.bremer@leuphana.de</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-0304-5859</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Chow</surname>
            <given-names>Philip I</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-6428-1540</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Funk</surname>
            <given-names>Burkhardt</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-5855-2666</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Thorndike</surname>
            <given-names>Frances P</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-5976-3016</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Ritterband</surname>
            <given-names>Lee M</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-7624-5213</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Institute of Information Systems</institution>
        <institution>Leuphana University Lüneburg</institution>
        <addr-line>Lüneburg</addr-line>
        <country>Germany</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Center for Behavioral Health &#38; Technology</institution>
        <institution>University of Virginia School of Medicine</institution>
        <addr-line>Charlottesville, VA</addr-line>
        <country>United States</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Vincent Bremer <email>vincent.bremer@leuphana.de</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <month>10</month>
        <year>2020</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>28</day>
        <month>10</month>
        <year>2020</year>
      </pub-date>
      <volume>22</volume>
      <issue>10</issue>
      <elocation-id>e17738</elocation-id>
      <history>
        <date date-type="received">
          <day>9</day>
          <month>1</month>
          <year>2020</year>
        </date>
        <date date-type="rev-request">
          <day>11</day>
          <month>7</month>
          <year>2020</year>
        </date>
        <date date-type="rev-recd">
          <day>3</day>
          <month>9</month>
          <year>2020</year>
        </date>
        <date date-type="accepted">
          <day>20</day>
          <month>9</month>
          <year>2020</year>
        </date>
      </history>
      <copyright-statement>©Vincent Bremer, Philip I Chow, Burkhardt Funk, Frances P Thorndike, Lee M Ritterband. Originally published in the Journal of Medical Internet Research (http://www.jmir.org), 28.10.2020.</copyright-statement>
      <copyright-year>2020</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in the Journal of Medical Internet Research, is properly cited. The complete bibliographic information, a link to the original publication on http://www.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="http://www.jmir.org/2020/10/e17738/" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>User dropout is a widespread concern in the delivery and evaluation of digital (ie, web and mobile apps) health interventions. Researchers have yet to fully realize the potential of the large amount of data generated by these technology-based programs. Of particular interest is the ability to predict who will drop out of an intervention. This may be possible through the analysis of user journey data—self-reported as well as system-generated data—produced by the path (or journey) an individual takes to navigate through a digital health intervention.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>The purpose of this study is to provide a step-by-step process for the analysis of user journey data and eventually to predict dropout in the context of digital health interventions. The process is applied to data from an internet-based intervention for insomnia as a way to illustrate its use. The completion of the program is contingent upon completing 7 sequential cores, which include an initial tutorial core. Dropout is defined as not completing the seventh core.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>Steps of user journey analysis, including data transformation, feature engineering, and statistical model analysis and evaluation, are presented. Dropouts were predicted based on data from 151 participants from a fully automated web-based program (Sleep Healthy Using the Internet) that delivers cognitive behavioral therapy for insomnia. Logistic regression with L1 and L2 regularization, support vector machines, and boosted decision trees were used and evaluated based on their predictive performance. Relevant features from the data are reported that predict user dropout.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>Accuracy of predicting dropout (area under the curve [AUC] values) varied depending on the program core and the machine learning technique. After model evaluation, boosted decision trees achieved AUC values ranging between 0.6 and 0.9. Additional handcrafted features, including time to complete certain steps of the intervention, time to get out of bed, and days since the last interaction with the system, contributed to the prediction performance.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>The results support the feasibility and potential of analyzing user journey data to predict dropout. Theory-driven handcrafted features increased the prediction performance. The ability to predict dropout at an individual level could be used to enhance decision making for researchers and clinicians as well as inform dynamic intervention regimens.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>dropout</kwd>
        <kwd>digital health</kwd>
        <kwd>machine learning</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <p>The efficacy of digital (ie, internet, web, and mobile) behavioral interventions to improve a range of health-related outcomes has been well documented [<xref ref-type="bibr" rid="ref1">1</xref>-<xref ref-type="bibr" rid="ref3">3</xref>]. However, adherence to these interventions is a significant issue [<xref ref-type="bibr" rid="ref4">4</xref>]. Intervention dropout, defined as a participant prematurely discontinuing a program, from internet-based treatments for psychological disorders typically varies between 30% and 50% [<xref ref-type="bibr" rid="ref4">4</xref>-<xref ref-type="bibr" rid="ref6">6</xref>]. However, the reason for such high dropout rates is still unclear [<xref ref-type="bibr" rid="ref5">5</xref>], whereas longer treatment duration and user engagement appear to be associated with improved treatment outcomes and greater effectiveness of the digital intervention [<xref ref-type="bibr" rid="ref7">7</xref>-<xref ref-type="bibr" rid="ref10">10</xref>]. Furthermore, in a research setting, high dropout rates and, consequently, low exposure to digital content might affect the reported effects of a digital intervention and the validity of the results [<xref ref-type="bibr" rid="ref11">11</xref>,<xref ref-type="bibr" rid="ref12">12</xref>]. Although researchers have highlighted the need for a science of user attrition [<xref ref-type="bibr" rid="ref13">13</xref>], there have been few advances in predicting dropout through advanced quantitative approaches in eHealth interventions [<xref ref-type="bibr" rid="ref14">14</xref>]. In particular, previous work has identified hypothetical factors influencing attrition in eHealth programs, such as ease of leaving the intervention, unrealistic expectations on behalf of users, usability and interface issues, and amount of workload required to benefit from an intervention [<xref ref-type="bibr" rid="ref13">13</xref>]. Such factors are likely to impact how a user ultimately engages with a program and could provide indicators for predictive factors but do little to advance predictive modeling of dropout when not applied in data-driven studies. Research suggests that an increased completion of modules in digital therapeutics increases treatment outcomes [<xref ref-type="bibr" rid="ref15">15</xref>]. Identifying those patients that are likely to drop out of treatment and addressing the related issues can, thus, improve treatment outcomes and can be the basis of the development of micro interventions that target these high-risk participants to reengage them to complete the program [<xref ref-type="bibr" rid="ref16">16</xref>]. Thus, predicting dropout on a participant level supports the decision making of experts in the target field and consequently leads to more personalized treatment strategies. In addition, inferential results can increase insight into the causes of attrition by revealing data-driven indicators. Participant-specific factors can help to identify individuals who benefit more from digital therapies compared with individuals for whom face-to-face treatment might be a better approach. To evaluate the possibility of predicting dropout in digital interventions and to shed light on some indicators of dropout, the aim of this study is to propose a process for user journey analysis to predict dropout from a digital intervention.</p>
      <p>A wealth of data can be collected through the use of digital interventions. They often feature content that is administered over time as users complete tasks or components of the intervention, typically over several weeks or months [<xref ref-type="bibr" rid="ref17">17</xref>-<xref ref-type="bibr" rid="ref20">20</xref>]. Digital interventions also track and log different types of user interactions (eg, frequency of log-ins). These data provide a nuanced understanding of the usage behavior of participants over the course of an intervention [<xref ref-type="bibr" rid="ref21">21</xref>]. Combined with self-reported data, passively collected user data could be captured and used to provide deeper insight into how likely users are to drop out of an intervention on an individual level and lead to increased prediction performance.</p>
      <p>A user journey is a sequence of interactions as an individual uses a digital intervention (ie, the path an individual takes to navigate through a program). Although user journeys are well known and established in the field of web-based marketing, to the best of our knowledge, its direct application to digital health interventions has not yet been examined. Web-based marketers leverage user journeys to collect information about an individual’s behavior [<xref ref-type="bibr" rid="ref22">22</xref>], often referred to as clickstream data analysis [<xref ref-type="bibr" rid="ref23">23</xref>,<xref ref-type="bibr" rid="ref24">24</xref>]. This increases the understanding of users’ behavior by recognizing patterns in their sequence of actions. Thus, user journey analysis can reveal insight into an individual’s behavior by enabling an analysis of data (eg, Ecological Momentary Assessment [EMA] or log data) that is not frequently used in the eHealth sphere [<xref ref-type="bibr" rid="ref25">25</xref>].</p>
      <p>There are several possible reasons why analysis of user journeys has not achieved prominence in digital health interventions. One obstacle lies in the analysis of large amounts of raw data. Analysis of user journeys often requires transformation of raw data, feature engineering, and the application of machine learning techniques, which can be a burdensome process [<xref ref-type="bibr" rid="ref26">26</xref>] and is not a typical skill set of eHealth behavior researchers. Although user journeys have been used to predict different psychological factors such as mood, stress levels, or treatment outcomes and costs [<xref ref-type="bibr" rid="ref25">25</xref>,<xref ref-type="bibr" rid="ref27">27</xref>-<xref ref-type="bibr" rid="ref31">31</xref>], to our knowledge, no work has provided steps to be taken to analyze raw user journey data and, at the same time, predict user dropout from a digital health intervention.</p>
      <p>The overarching goal of this study is to establish and provide a step-by-step process that describes how to leverage user journeys to predict various behaviors (eg, dropout). This process involves several steps, including creating the basic data structure for handling user journeys, creating features that can add additional information to the existing raw data, and ultimately providing a framework for the statistical analysis. A technical implementation (R package) [<xref ref-type="bibr" rid="ref32">32</xref>,<xref ref-type="bibr" rid="ref33">33</xref>] of this process is provided for the research community. To demonstrate the application and potential utility of this process, we use it to predict user dropout in a randomized controlled trial of a fully automated cognitive behavior therapy intervention for insomnia (Sleep Healthy Using the Internet [SHUTi]) [<xref ref-type="bibr" rid="ref34">34</xref>].</p>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>User Journey Process</title>
        <p>The overarching steps of the user journey process are outlined in <xref rid="figure1" ref-type="fig">Figure 1</xref>. This process applies machine learning algorithms, specifically supervised learning, which is used when both input (eg, log-ins and mood symptoms) and output data (eg, dropout status) exist in the data set [<xref ref-type="bibr" rid="ref35">35</xref>].</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>Process of analysis. AUC: area under the curve; MAE mean absolute error; ROC: receiver operating characteristics; RMSE: root mean square error.</p>
          </caption>
          <graphic xlink:href="jmir_v22i10e17738_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <p>It is important for researchers to clearly define the outcome variable of interest. As dependent variables can take on different measurement scales (eg, discrete or continuous), defining the target variable has consequences for the choice of statistical models. When predicting discrete outcomes (ie, consisting of at least two discrete categories or labels), classification is often the appropriate approach. However, when predicting continuous outcome variables, the learning task is regression.</p>
        <sec>
          <title>Step One: Data Transformation</title>
          <p>The first step in analyzing user journey data is to transform the raw data into a wide format, as can be seen in <xref rid="figure2" ref-type="fig">Figure 2</xref>. Thus, the transformed data are structured such that each row corresponds to a unique observation in <italic>Time</italic> for a particular user (<italic>ID</italic>).</p>
          <fig id="figure2" position="float">
            <label>Figure 2</label>
            <caption>
              <p>Example of data transformation in the context of digital health interventions.</p>
            </caption>
            <graphic xlink:href="jmir_v22i10e17738_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
          <p>When transforming the raw data, it is important to specify the time window defining the time interval for which individual touch points are aggregated. The choice of the time window depends on the density of the observations in the raw data. For example, if a raw data set is composed of a few touch points over the course of a day, choosing a time window on a scale of days avoids sparseness of the transformed data matrix. In contrast, when predicting purchases in web-based marketing, for example, a large number of observations exist for each user on short timescales. Here, choosing a small window (eg, an hour) could be beneficial, as the resulting matrix will not be sparse and information loss is minimal. In an internet-based intervention, however, it is not unusual for self-reported data to be collected as little as once a day, with a user logging into the system only a few times a day. In this case, it would not make sense to choose an hour-long window because the resulting matrix would be very sparse. Thus, choosing a time window on a scale of days would be a better choice.</p>
          <p>If multiple observations of the same type occur within a time window, one must decide how to aggregate these values. For some variables, such as diary entries, taking an average may be desirable; for other variables, such as log-ins, the sum is a more appropriate aggregation. The provided technical framework supports the data transformation procedure. In addition, missing values often exist in the data. There are various procedures that can handle missing values. One might remove all rows that include missing values; however, this can lead to a reduction in observations. Other possibilities include imputation procedures such as using aggregated values of these features or developing statistical models that predict the missing values based on other features. For more information on missing values, we refer to the study by Batista and Monard [<xref ref-type="bibr" rid="ref36">36</xref>].</p>
        </sec>
        <sec>
          <title>Step Two: Feature Engineering</title>
          <p>Feature engineering can be described as the process of including additional variables into the data with the intention of achieving increased predictive performance. As statistical learning relies heavily on the input data, this step is important for improving the accuracy of prediction [<xref ref-type="bibr" rid="ref37">37</xref>]. There are 2 approaches to feature engineering: handcrafted or automated. Handcrafted feature engineering is a challenging task and requires human effort and domain knowledge. Therefore, it is appropriate for researchers with expertise in the domain that is represented by the data (eg, sleep) to be highly involved in the process [<xref ref-type="bibr" rid="ref38">38</xref>-<xref ref-type="bibr" rid="ref40">40</xref>]. A clear understanding of the problem to be solved is necessary to derive meaningful features [<xref ref-type="bibr" rid="ref40">40</xref>]. Handcrafted feature engineering often involves a trial and error phase to experiment with different features [<xref ref-type="bibr" rid="ref37">37</xref>]. Automated feature engineering involves the generation of candidate features that are evaluated based on their predictive performance. Tools exist for the application of automated feature engineering in different domains, such as natural language processing or machine vision [<xref ref-type="bibr" rid="ref38">38</xref>,<xref ref-type="bibr" rid="ref41">41</xref>,<xref ref-type="bibr" rid="ref42">42</xref>].</p>
          <p>Interaction terms, that is, the product of 2 original features, can lead to additional knowledge about their relationships and increased predictive accuracy. The provided technical framework supports generating them. In case of a large number of original features, however, including interaction terms results in many additional features.</p>
          <p>In addition, time window–based aggregation methods can be beneficial in terms of predictive performance in the context of digital health interventions [<xref ref-type="bibr" rid="ref31">31</xref>]. Here, based on a user-specified time window <italic>w</italic>, various types of aggregations are performed on the original features. <xref rid="figure3" ref-type="fig">Figure 3</xref> represents the process of this task through the exemplification of self-reported EMA data. The <italic>Mood</italic> level is reported by an individual at different points in time (<italic>Time steps</italic>). For the creation of the aggregated features, a time window of <italic>w</italic>=3 is specified in this example. Various statistical measures, such as the sum (<italic>Mood_sum</italic>), mean (<italic>Mood_mean</italic>), minimum, maximum, and SD (not shown in figure), are calculated for 3 consecutive measurements of the mood level (<italic>w</italic>=3) and included as additional features in the data set. It should be noted that the creation of features can limit one’s ability to reproduce study results if the feature engineering process is not well documented or if the data set changes over time. For the case study in this paper, we created various theory-driven features based on expert knowledge, which will be introduced in <italic>Feature Engineering</italic>.</p>
          <fig id="figure3" position="float">
            <label>Figure 3</label>
            <caption>
              <p>Example of creating aggregated time window–based features for w=3.</p>
            </caption>
            <graphic xlink:href="jmir_v22i10e17738_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
        </sec>
        <sec>
          <title>Step Three: Statistical Analysis and Model Validation</title>
          <p>The next step in analyzing user journey data is the application of machine learning techniques to predict the outcome variable. <xref rid="figure4" ref-type="fig">Figure 4</xref> depicts this procedure. First, the data set can be split into a training set for fitting the data and learning patterns and a test (or holdout) set. This test set is usually created if sufficient data are available. It is subsequently used to test the final model performance of the selected algorithm. It is difficult, however, to quantify <italic>sufficient data</italic> as it depends strongly on the field of research, applied models, and structure of the data.</p>
          <fig id="figure4" position="float">
            <label>Figure 4</label>
            <caption>
              <p>Procedure of statistical analysis.</p>
            </caption>
            <graphic xlink:href="jmir_v22i10e17738_fig4.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
          <p>Depending on the task to be analyzed, the data can be further split based on particular points in time. If the aim of the analysis, for example, is the prediction of the outcome of an intervention, it might be useful to evaluate at what point in time the predictive accuracy is at its peak. The longer the time window, the higher the predictive accuracy can be assumed because more data are available. Thus, using time windows and basing the amount of usable data on these windows (<italic>interval cut off</italic>) can be useful in evaluating the feasibility of prediction.</p>
          <p>There are a large number of machine learning techniques that can be applied to user journey data; some models can be applied to both learning tasks (classification or regression), such as support vector machines or decision trees, whereas others fit better for a specific task (ie, logistic regression for classification). Researchers may wish to compare their predictive performance to justify the model selection. Cross-validation is often applied to gauge the predictive performance of a specified model. Here, the data are divided into k chunks, where k-1 chunks are used for training the machine learning techniques and the remaining data chunk is used for predicting the target variable. This procedure is repeated k times until each chunk has been used as a validation set. Ultimately, the model with the best performance is selected for the specified learning task. If a holdout set is maintained, the specified model is then trained based on all data. The target variable in the holdout set is then predicted and evaluated, which leads to the test prediction error.</p>
          <p>Model validation checks the ability of a particular model to either fit the data or predict the outcome variable [<xref ref-type="bibr" rid="ref43">43</xref>]. Eventually, the one with the best performance is selected. Nonvalidation can lead to inaccurate predictions and, thus, overconfidence in the developed model [<xref ref-type="bibr" rid="ref44">44</xref>]. Model validation should generally be executed on the validation set for each iteration of the cross-validation procedure (cross-validated prediction error) to select the best model and, subsequently, on an independent test set that was set aside earlier (test prediction error). In some cases, especially when sufficient data are not available, no independent test set is put aside and only the cross-validated error is reported, which can lead to an optimistic estimation of the error [<xref ref-type="bibr" rid="ref44">44</xref>].</p>
          <p>Deciding on the method of model validation also depends on the learning task. For regression, criteria such as the root mean square error or mean absolute error are often appropriate. For the classification task, confusion matrices and receiver operating characteristic (ROC) graphs are often used as performance indicators. More information about these validation procedures and their application can be found elsewhere [<xref ref-type="bibr" rid="ref45">45</xref>].</p>
          <p>In the provided technical framework, logistic regression, linear regression, support vector machines, boosted decision trees, and regularization techniques are implemented. As overfitting can occur when utilizing a large number of features [<xref ref-type="bibr" rid="ref37">37</xref>] and some types of statistical procedures (eg, linear regression) cannot be applied when the number of features is greater than the number of observations, alternative techniques such as regularization and feature selection may need to be used [<xref ref-type="bibr" rid="ref46">46</xref>]. A thorough review of these techniques is outside the scope of this paper, and readers are strongly encouraged to learn more about each of these techniques and how they pertain to their data and aims.</p>
        </sec>
      </sec>
      <sec>
        <title>Case Study</title>
        <p>To illustrate the user journey analysis process, data were extracted from a trial of a web-based program (SHUTi) [<xref ref-type="bibr" rid="ref47">47</xref>]. SHUTi is a fully automated web-delivered program that is tailored to individual users [<xref ref-type="bibr" rid="ref47">47</xref>] and informed by the model for internet interventions [<xref ref-type="bibr" rid="ref17">17</xref>]. SHUTi is based on the primary principles of face-to-face cognitive behavioral therapy for insomnia (CBT-I), including sleep restriction, stimulus control, cognitive restructuring, sleep hygiene, and relapse prevention. SHUTi contains 7 <italic>cores</italic> that are dispensed over time, the first core being a tutorial on how to use the program, with new cores becoming available 7 days after completion of a previous core. This format was meant to mirror traditional CBT-I delivery procedures using a weekly session format. SHUTi has been found to be more efficacious than web-based patient education in changing primary sleep outcomes (insomnia severity, sleep onset latency [SOL], and wake after sleep onset [WASO]), with the majority of SHUTi users achieving insomnia remission status 1 year later [<xref ref-type="bibr" rid="ref48">48</xref>]. A mobile app version of SHUTi, Somryst, with equivalent content and mechanisms of action was recently cleared by Food and Drug Administration as the first prescription digital therapeutic for treating patients with chronic insomnia. Thus, the efficacy of SHUTi is well established. However, similar to other digital interventions, predicting user dropout is an important yet unaddressed issue. Thus, the primary aim of this case study is to demonstrate the feasibility of predicting user dropout from data generated by a digital health intervention.</p>
        <p>The sample for this study was drawn from a trial consisting of 303 participants (218/303, 71.9% female) aged between 21 and 65 years (mean 43.3 years, SD 11.6). They were 83.8% (254/303) White, 6.9% (21/303) Black, 4.0% (12/303) Asian, and 5.3% (16/303) <italic>other</italic>. Participants were randomly assigned (using a random number generator) to receive SHUTi or web-based patient education (control condition). The study was approved by the local university’s institutional review board, and the project was registered on clinicaltrials.gov (NCT01438697). Inclusionary and exclusionary criteria as well as outcomes are reported in detail elsewhere [<xref ref-type="bibr" rid="ref48">48</xref>].</p>
        <p>Data from 151 participants who were assigned to SHUTi were used in this study. Both self-reported and system-generated types of data are available. Participants completed a battery of self-report measures at baseline and post intervention. A list and detailed description of the measures have been published previously [<xref ref-type="bibr" rid="ref48">48</xref>]. Sleep diaries were also collected throughout the intervention period, along with information about bedtime, length of sleep onset, number and duration of awakenings, perceived sleep quality, and rising time. Data were collected prospectively for 10 days (during a 2-week period) at each of the 4 assessment periods (pre- and postintervention and 6- and 12-month follow-ups). Sleep diary questions mirrored those from the consensus sleep diary [<xref ref-type="bibr" rid="ref49">49</xref>]. Values for SOL and WASO were averaged across the 10 days of diary collection at each assessment period. The system-generated data included individual log-ins and automated emails sent by the system as well as trigger events logged in the system. All data were used to predict user dropout, defined as not completing all 7 SHUTi cores (core 0 through core 6). Thus, users were classified as having dropped out or not. As noted elsewhere [<xref ref-type="bibr" rid="ref48">48</xref>], 60.3% (91/151) participants completed all 7 cores in the SHUTi program.</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <p>The primary aim was to predict whether users prematurely dropped out of SHUTi (dropped out by core 6/completed core 6). Therefore, the learning problem is a binary classification (drop out/did not drop out). To verify the point at which the machine learning techniques were capable of predicting dropout, separate analyses were executed after the completion of each core (<xref rid="figure5" ref-type="fig">Figure 5</xref>) and only included data up to the core in question. The number of participants included in each analysis was 146, 141, 133, 116, 102, and 101 for cores 0 to 5, respectively.</p>
      <fig id="figure5" position="float">
        <label>Figure 5</label>
        <caption>
          <p>Setup of analysis for dropout prediction.</p>
        </caption>
        <graphic xlink:href="jmir_v22i10e17738_fig5.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
      </fig>
      <sec>
        <title>Data Transformation</title>
        <p>As a first step, the raw data were transformed into a rectangular data matrix (wide format), which led to 981 basic features. Basic features are those features that were already included in the raw data. As an example, see column <italic>Type</italic> in <xref rid="figure2" ref-type="fig">Figure 2</xref>. In addition, 25 handcrafted and theory-driven features that were derived from the raw data were implemented. These features are introduced in the next section <italic>Feature Engineering</italic>. In total, 1006 features were used for the analyses. Whenever the same question (ie, in the case of diary data) was administered multiple times a day, the mean of the reported values was chosen for numeric data and the mode for categorical data. To reduce the sparseness of the resulting data matrix, reported values for questionnaires such as the Insomnia Severity Index were repeated for each participant until the next occurrence of the questionnaire (this questionnaire was administered before each core). To address the issue of missing data, features were deleted based on the quantity of missing data. To evaluate how the deletion affects the predictive performance of the models, features were deleted that contained more than 5%, 10%, 15%, and 20% of missing values. This procedure reduced the number of features tremendously. In addition, categorical variables that had only one level or category were removed. Less data are available for the analysis at time point core 0 compared with time point core 5. Thus, the number of features for each level of missing data was 83, 263, 299, and 401 features.</p>
        <p>As the aim of this study was to predict dropout at core 6, each participant only had exactly one outcome value—they could either complete core 6 or not. Users that dropped out between cores 1 to 5 would be classified as having dropped out at core 6. Therefore, the user journey data must be aggregated for each user. For most of the variables, the mean and mode were used as the aggregation method. However, for some variables, such as log-in information or number of days since the last contact, the sum is more appropriate. <xref ref-type="table" rid="table1">Table 1</xref> illustrates the different aggregation procedures and the corresponding features. Features that are not listed were aggregated by mean and mode. The rest of the missing data were imputed using the median for numeric variables and mode for categorical features. In addition, an imputation based on the k-nearest neighbor (KNN) algorithm was applied (k=5). Both approaches were used to reveal which of them led to a better prediction performance.</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Aggregation of theory-determined features.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="340"/>
            <col width="380"/>
            <col width="280"/>
            <thead>
              <tr valign="top">
                <td>Feature aggregation method</td>
                <td>Handcrafted features</td>
                <td>Existing clinically important features</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Sum: The sum of all observations of a specific feature for an individual</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Days since the last contact (any interaction)</p>
                    </list-item>
                    <list-item>
                      <p>If sleeping duration is decreasing from core to core</p>
                    </list-item>
                    <list-item>
                      <p>If sleep window duration is 5 or 8 hours</p>
                    </list-item>
                  </list>
                </td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>If the participant had an alcoholic drink that day</p>
                    </list-item>
                    <list-item>
                      <p>If the participant took a nap</p>
                    </list-item>
                    <list-item>
                      <p>If the system recorded a triggered event that day</p>
                    </list-item>
                    <list-item>
                      <p>If the participant logged in that day</p>
                    </list-item>
                    <list-item>
                      <p>If the system sent an email that day</p>
                    </list-item>
                  </list>
                </td>
              </tr>
              <tr valign="top">
                <td>Last: The last observation of a specific feature for an individual</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Difference between preferred arising time in core 2 and core 3</p>
                    </list-item>
                    <list-item>
                      <p>If preferred arising time is greater than 8 AM in core 2</p>
                    </list-item>
                    <list-item>
                      <p>Average time in days to complete a core among all cores that have been available</p>
                    </list-item>
                    <list-item>
                      <p>Time needed in days to complete a core in days (6 features for core 0-5)</p>
                    </list-item>
                  </list>
                </td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>If the participant finished homework in core 2</p>
                    </list-item>
                    <list-item>
                      <p>Number of days where no diaries have been completed in the period of analysis</p>
                    </list-item>
                    <list-item>
                      <p>Precipitating factor includes <italic>major life event</italic> or <italic>health/psychological</italic></p>
                    </list-item>
                  </list>
                </td>
              </tr>
              <tr valign="top">
                <td>Mean: Mean of the observations of a specific feature for an individual</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Difference between awake and arise time</p>
                    </list-item>
                    <list-item>
                      <p>Difference between preferred arise time and actual arise time (AM/PM)</p>
                    </list-item>
                    <list-item>
                      <p>Difference between preferred arise time and actual arise time (minutes)</p>
                    </list-item>
                    <list-item>
                      <p>Difference between preferred bedtime and actual bedtime</p>
                    </list-item>
                  </list>
                </td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Naptime in minutes</p>
                    </list-item>
                  </list>
                </td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
      </sec>
      <sec>
        <title>Feature Engineering</title>
        <p>A total of 25 theory-driven features were implemented for this case study. Some of these features, shown in <xref ref-type="table" rid="table1">Table 1</xref>, were handcrafted and some were already existing in the data set. Specifically, the handcrafted features were computed from the raw data and were deemed useful for model prediction. Few of these features are study-specific (eg, <italic>if the participant finished homework in core 2</italic>), whereas others could be used in any type of digital intervention (eg, <italic>if the participant logged in</italic>). As the number of features generated from the study data was already large, none of the generic feature generation methods were used. These 25 features were not deleted based on the missing value ratio (mentioned above) because there was a clinical or theory-driven rationale that they would influence prediction performance.</p>
      </sec>
      <sec>
        <title>Statistical Analysis and Model Validation</title>
        <p>For the learning task, a set of machine learning techniques was used to select the model with the best prediction performance. Specifically, support vector machines, boosted decision trees, and logistic regression with L1 and L2 regularization were applied. The optimal parameters were determined using a grid-based search and cross-validation. In addition, stratified 10-fold cross-validation was used for each analysis. To choose an appropriate statistical model, a heat map was created to illustrate the average area under the curve (AUC) across all core analyses for each model, imputation procedure, and threshold for percentage of missing values (<xref rid="figure6" ref-type="fig">Figure 6</xref>). As can be seen, the method of imputing the missing values did not have a strong influence on the performance of the applied statistical model. Increasing the percentage threshold negatively influenced L1 regularization and the support vector machine, whereas L2 regularization and boosted decision trees seemed not to be influenced tremendously. The best average AUC value (0.719) was achieved by applying boosted decision trees, deleting each feature that contained more than 15% of missing values, and imputing the rest of the missing values by KNN.</p>
        <fig id="figure6" position="float">
          <label>Figure 6</label>
          <caption>
            <p>Heat map of average area under the curve values across core analyses for each model, imputation procedure, and threshold for percentage of missing values. AUC: area under the curve; KNN: k-nearest neighbor; LASSO: least absolute shrinkage and selection operator; SVM: support vector machine.</p>
          </caption>
          <graphic xlink:href="jmir_v22i10e17738_fig6.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <p><xref rid="figure7" ref-type="fig">Figure 7</xref> illustrates the ROC curves for each core analysis using the specified parameters. With the exception of core 4, the AUC values increased with each analysis. For each core, the predictions were better than random, indicated by AUC values above 0.5. Generally, the AUC values ranged between 0.6 and 0.9. Importantly, the prediction of dropout appears feasible early in the intervention period (ie, core 1 and core 2). In addition, the area under the precision-recall curve (PRAUC) was computed. Across all core analyses, a PRAUC of 0.48 was observed, whereas chance had an average of 0.24. Thus, the model performs better than chance.</p>
        <fig id="figure7" position="float">
          <label>Figure 7</label>
          <caption>
            <p>Receiver operating characteristic for each core analysis based on boosted decision trees (15% missing value deletion, k-nearest neighbor imputation). AUC: area under the curve; FPR: false-positive rate; TPR: true-positive rate.</p>
          </caption>
          <graphic xlink:href="jmir_v22i10e17738_fig7.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <p>Boosted decision trees were used to identify important features. Here, SHapley Additive exPlanation (SHAP) values were used [<xref ref-type="bibr" rid="ref50">50</xref>]. SHAP values are a relatively new concept in the field of machine learning and essentially represent the importance of each feature and their contribution to the prediction by comparing the prediction of the model with and without a specified feature value depending on the order of their introduction to the model. In addition to the importance of each feature, SHAP values quantify how features contribute to the prediction of the model.</p>
        <p><xref rid="figure8" ref-type="fig">Figures 8</xref>-<xref rid="figure13" ref-type="fig">13</xref> include the 5 most important features according to the boosted decision trees for each core analysis. In each graph, the x-axis represents the values for each feature and the y-axis represents the SHAP values (ie, the effect each feature has on predicting the completion of core 6 of the intervention). In the core 0 analysis, for example, finishing core 0 within 3 days (x-axis) has a positive influence on dropout, as can be seen on the y-axis above zero. However, taking more time to complete core 0 (where x-axis is greater than 3) influences dropout prediction negatively as the graph approaches values under zero.</p>
        <fig id="figure8" position="float">
          <label>Figure 8</label>
          <caption>
            <p>Five most important features for each core analysis according to boosted decision trees (15% deletion of missing values, and k-nearest neighbor imputation). The x-axis represents the values for each feature, and the y-axis represents the SHAP values. SHAP: SHapley Additive exPlanation; SOL: sleep onset latency; WASO: wake after sleep onset.</p>
          </caption>
          <graphic xlink:href="jmir_v22i10e17738_fig8.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <fig id="figure9" position="float">
          <label>Figure 9</label>
          <caption>
            <p>Five most important features for each core analysis according to boosted decision trees (15% deletion of missing values, KNN imputation, and Core 1 analysis). SHAP: SHapley Additive exPlanation; WASO: wake after sleep onset.</p>
          </caption>
          <graphic xlink:href="jmir_v22i10e17738_fig9.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <fig id="figure10" position="float">
          <label>Figure 10</label>
          <caption>
            <p>Five most important features for each core analysis according to boosted decision trees (15% deletion of missing values, KNN imputation, and Core 2 analysis). SHAP: SHapley Additive exPlanation.</p>
          </caption>
          <graphic xlink:href="jmir_v22i10e17738_fig10.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <fig id="figure11" position="float">
          <label>Figure 11</label>
          <caption>
            <p>Five most important features for each core analysis according to boosted decision trees (15% deletion of missing values, KNN imputation, and Core 3 analysis). SHAP: SHapley Additive exPlanation.</p>
          </caption>
          <graphic xlink:href="jmir_v22i10e17738_fig11.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <fig id="figure12" position="float">
          <label>Figure 12</label>
          <caption>
            <p>Five most important features for each core analysis according to boosted decision trees (15% deletion of missing values, KNN imputation, and Core 4 analysis). SHAP: SHapley Additive exPlanation.</p>
          </caption>
          <graphic xlink:href="jmir_v22i10e17738_fig12.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <fig id="figure13" position="float">
          <label>Figure 13</label>
          <caption>
            <p>Five most important features for each core analysis according to boosted decision trees (15% deletion of missing values, KNN imputation, and Core 5 analysis). SHAP: SHapley Additive exPlanation.</p>
          </caption>
          <graphic xlink:href="jmir_v22i10e17738_fig13.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <p>In general, 7 out of the strongest 22 features were handcrafted and theory driven. <xref ref-type="table" rid="table2">Table 2</xref> summarizes all the features. Taking more time to complete the cores appeared to influence dropout. The time to complete core 0 predicted whether a participant eventually dropped out (core 0 and core 1 analysis). In addition, usual arise time and the time needed to get out of bed (from awake to arise) affected the prediction of dropout early on in the intervention. Participants who got up earlier than 4:30 AM and later than 6:45 AM, and participants who needed less than 9 min or more than 66 min to get up, negatively influenced the prediction of completing core 6 of the intervention (x-axis of the feature usual arise time and time to get up for core 0). Furthermore, a greater WASO also appeared to influence the prediction of dropout status. These variables could, therefore, be an early indicator of dropout in this particular intervention.</p>
        <p>In addition, if triggers were logged on for more than 18 days or participants received emails for more than 30 days, dropping out was more likely (core 3 analysis). Furthermore, if there was no interaction between the system and the participants for more than 67 days, the individuals were more likely to drop out.</p>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>Summary of the unique top 5 most important features across analyses.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="220"/>
            <col width="420"/>
            <col width="60"/>
            <col width="60"/>
            <col width="60"/>
            <col width="60"/>
            <col width="60"/>
            <col width="60"/>
            <thead>
              <tr valign="top">
                <td colspan="2">Predictors</td>
                <td colspan="6">Analysis at each point in time</td>
              </tr>
              <tr valign="top">
                <td>Feature</td>
                <td>Description</td>
                <td>Core 0</td>
                <td>Core 1</td>
                <td>Core 2</td>
                <td>Core 3</td>
                <td>Core 4</td>
                <td>Core 5</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Core 0 completion date—intervention start date<sup>a</sup></td>
                <td>Time to complete core 0 in days</td>
                <td>+<sup>b</sup></td>
                <td>+</td>
                <td>N/A<sup>c</sup></td>
                <td>N/A</td>
                <td>N/A</td>
                <td>N/A</td>
              </tr>
              <tr valign="top">
                <td>Arise time—awake time<sup>a</sup></td>
                <td>Difference between time of awakening and getting out of bed in minutes (time to get up)</td>
                <td>+</td>
                <td>N/A</td>
                <td>N/A</td>
                <td>N/A</td>
                <td>N/A</td>
                <td>N/A</td>
              </tr>
              <tr valign="top">
                <td>Usual arise time</td>
                <td>Retrospective report specified from baseline data</td>
                <td>+</td>
                <td>N/A</td>
                <td>N/A</td>
                <td>N/A</td>
                <td>N/A</td>
                <td>N/A</td>
              </tr>
              <tr valign="top">
                <td>Wake after sleep onset</td>
                <td>Minutes awake in the middle of the night from sleep diaries</td>
                <td>+</td>
                <td>+</td>
                <td>N/A</td>
                <td>N/A</td>
                <td>N/A</td>
                <td>N/A</td>
              </tr>
              <tr valign="top">
                <td>Sleep onset latency</td>
                <td>Minutes to fall asleep from sleep diaries</td>
                <td>+</td>
                <td>N/A</td>
                <td>N/A</td>
                <td>N/A</td>
                <td>N/A</td>
                <td>N/A</td>
              </tr>
              <tr valign="top">
                <td>Baseline arise time (pre retro sleep arising time)</td>
                <td>Time the user specified that they got out of bed from baseline data</td>
                <td>N/A</td>
                <td>+</td>
                <td>+</td>
                <td>N/A</td>
                <td>N/A</td>
                <td>N/A</td>
              </tr>
              <tr valign="top">
                <td>Pre retro sleep waking early</td>
                <td>User indicates having problems waking up too early in the morning</td>
                <td>N/A</td>
                <td>+</td>
                <td>N/A</td>
                <td>N/A</td>
                <td>N/A</td>
                <td>N/A</td>
              </tr>
              <tr valign="top">
                <td>Pre teach trust info source c</td>
                <td>How much the user trusts health information</td>
                <td>N/A</td>
                <td>+</td>
                <td>N/A</td>
                <td>N/A</td>
                <td>N/A</td>
                <td>N/A</td>
              </tr>
              <tr valign="top">
                <td>Average time to complete core<sup>a</sup></td>
                <td>Average time to complete a core among all cores that have been available up to the point of the analysis</td>
                <td>N/A</td>
                <td>N/A</td>
                <td>+</td>
                <td>+</td>
                <td>+</td>
                <td>+</td>
              </tr>
              <tr valign="top">
                <td>Pre stpi 24 dep<sup>d,e</sup></td>
                <td>How low the user feels at baseline</td>
                <td>N/A</td>
                <td>N/A</td>
                <td>+</td>
                <td>N/A</td>
                <td>N/A</td>
                <td>N/A</td>
              </tr>
              <tr valign="top">
                <td>Pre se gen 3<sup>f</sup></td>
                <td>How well the user feels things have been going</td>
                <td>N/A</td>
                <td>N/A</td>
                <td>+</td>
                <td>N/A</td>
                <td>N/A</td>
                <td>N/A</td>
              </tr>
              <tr valign="top">
                <td>Bedtime</td>
                <td>If a participant went to bed in the AM or PM (before or after 12 AM)</td>
                <td>N/A</td>
                <td>N/A</td>
                <td>+</td>
                <td>N/A</td>
                <td>N/A</td>
                <td>N/A</td>
              </tr>
              <tr valign="top">
                <td>Email sent<sup>a</sup></td>
                <td>If the system sent an email that day</td>
                <td>N/A</td>
                <td>N/A</td>
                <td>N/A</td>
                <td>+</td>
                <td>N/A</td>
                <td>N/A</td>
              </tr>
              <tr valign="top">
                <td>Pre stpi 26 cur<sup>g</sup></td>
                <td>How stimulated the user feels at baseline</td>
                <td>N/A</td>
                <td>N/A</td>
                <td>N/A</td>
                <td>+</td>
                <td>+</td>
                <td>N/A</td>
              </tr>
              <tr valign="top">
                <td>Trigger event logged<sup>a</sup></td>
                <td>If the system logged a trigger event that day</td>
                <td>N/A</td>
                <td>N/A</td>
                <td>N/A</td>
                <td>+</td>
                <td>N/A</td>
                <td>N/A</td>
              </tr>
              <tr valign="top">
                <td>Pre teach stress 6</td>
                <td>User feels he or she can solve most problems if necessary effort is put in</td>
                <td>N/A</td>
                <td>N/A</td>
                <td>N/A</td>
                <td>+</td>
                <td>N/A</td>
                <td>N/A</td>
              </tr>
              <tr valign="top">
                <td>Pre stpi 18 cur<sup>h</sup></td>
                <td>How eager the user feels at baseline</td>
                <td>N/A</td>
                <td>N/A</td>
                <td>N/A</td>
                <td>N/A</td>
                <td>+</td>
                <td>N/A</td>
              </tr>
              <tr valign="top">
                <td>Core 4 completion date—core 4 start date<sup>a</sup></td>
                <td>Time to complete core 4 in days</td>
                <td>N/A</td>
                <td>N/A</td>
                <td>N/A</td>
                <td>N/A</td>
                <td>+</td>
                <td>+</td>
              </tr>
              <tr valign="top">
                <td>Pre stpi 29 anx<sup>i</sup></td>
                <td>How much self-confidence the user feels at baseline</td>
                <td>N/A</td>
                <td>N/A</td>
                <td>N/A</td>
                <td>N/A</td>
                <td>+</td>
                <td>N/A</td>
              </tr>
              <tr valign="top">
                <td>Days since the last information<sup>a</sup></td>
                <td>Days since the last contact (any interaction)</td>
                <td>N/A</td>
                <td>N/A</td>
                <td>N/A</td>
                <td>N/A</td>
                <td>N/A</td>
                <td>+</td>
              </tr>
              <tr valign="top">
                <td>Pre CESD<sup>j</sup> 14<sup>k</sup></td>
                <td>How lonely the user feels at baseline</td>
                <td>N/A</td>
                <td>N/A</td>
                <td>N/A</td>
                <td>N/A</td>
                <td>N/A</td>
                <td>+</td>
              </tr>
              <tr valign="top">
                <td>Pre retro sleep length of sleep prob</td>
                <td>Number of months the user reports having had sleep difficulties at baseline.</td>
                <td>N/A</td>
                <td>N/A</td>
                <td>N/A</td>
                <td>N/A</td>
                <td>N/A</td>
                <td>+</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table2fn1">
              <p><sup>a</sup>Handcrafted/theory-driven features.</p>
            </fn>
            <fn id="table2fn2">
              <p><sup>b</sup>+ indicates appearance of feature in corresponding core analysis.</p>
            </fn>
            <fn id="table2fn3">
              <p><sup>c</sup>N/A: not applicable.</p>
            </fn>
            <fn id="table2fn4">
              <p><sup>d</sup>STPI: state-trait personality inventory.</p>
            </fn>
            <fn id="table2fn5">
              <p><sup>e</sup>Pre stpi 24 dep: baseline STPI measure item #24 depression subscale.</p>
            </fn>
            <fn id="table2fn6">
              <p><sup>f</sup>Pre se gen 3: baseline Perceived Stress Scale item #5.</p>
            </fn>
            <fn id="table2fn7">
              <p><sup>g</sup>Pre stpi 26 cur: baseline STPI measure item #26 curiosity subscale.</p>
            </fn>
            <fn id="table2fn8">
              <p><sup>h</sup>Pre stpi 18 cur: baseline STPI measure item #18 curiosity subscale.</p>
            </fn>
            <fn id="table2fn9">
              <p><sup>i</sup>Pre stpi 29 anx: baseline STPI measure item #29 anxiety subscale.</p>
            </fn>
            <fn id="table2fn10">
              <p><sup>j</sup>Center for Epidemiologic Studies Depression Scale.</p>
            </fn>
            <fn id="table2fn11">
              <p><sup>k</sup>Pre CESD 14: baseline CESD measure item #14.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <p>Considering the increasing use of digital health interventions and the tremendous amount of data gathered in such interventions, a variety of methods can be used for the analysis of various data types and structures. In this study, a process for the analysis of user journey data in this context was proposed, and a step-by-step guide and technical framework for the analysis as an R package was provided. Challenges of data analysis based on user journeys, such as data transformation, feature engineering, and statistical model application and evaluation, were discussed. The analysis of user journeys can be a powerful tool for the prediction of various factors on an individual participant level. Here, it has been applied to real-world data to predict dropout from an internet-based intervention.</p>
        <p>The application of the proposed process and evaluation of statistical models indicated the feasibility of dropout prediction by using this process. AUC values ranged between 0.6 and 0.9 for the selected machine learning algorithm (boosted decision trees). Most importantly, it was shown that the prediction of user dropout was possible early in the intervention, which could be helpful to clinicians and policy makers as treatment decisions are made and adjusted. In addition, this study indicated the importance of expert knowledge and subsequent implementation of handcrafted features. Not all existing statistical models necessarily require handcrafted features because automated feature engineering can already provide crucial insight; however, handcrafted features can increase prediction performance and lead to increased interpretability. In this study, handcrafted features appeared to be among the most important features according to the boosted decision trees, perhaps given the more nuanced understanding necessary for treating insomnia. It is important to keep in mind, though, that the analysis presented here was meant as a demonstration of the power of this approach. A much larger data set is needed to draw more firm and generalizable conclusions.</p>
        <p>With this caveat, a number of interesting results emerged related to features and impact on dropout prediction. For example, as participants took longer to complete earlier steps of the intervention, they were less likely to complete the final step of the intervention. Thus, a discussion about how users can be motivated to complete early steps in the intervention may be very beneficial. In addition, the findings suggest that the time participants get out of bed in the morning and how much time they actually needed to get up might be an important factor for completing the sleep intervention. Participants who get out of bed between 4:30 AM and 6:45 AM and do not need more than 66 min to get out of bed were more likely to complete the final step of the intervention. In addition, trigger events might only have a positive effect in the short term, as the appearance of triggers more often than 18 days appeared to increase the likelihood of dropping out. However, it could be possible that this finding only accounts for participants who would not have completed the final step of the intervention. Assuming this, these participants were, therefore, not influenced by trigger events. It is also important to emphasize that these results are based on a bottom-up, data-driven learning approach. Therefore, it is up to researchers to interpret the results and cross-validate them in other samples. Predictions in this context based on user journey data and the resulting knowledge about factors that influence these predictions, especially on an individual level, could lead to the implementation of strategies that seek to improve the utilization and efficacy of digital health interventions.</p>
      </sec>
      <sec>
        <title>Limitations</title>
        <p>There are a number of limitations of this study that should be considered when interpreting the results. One limitation is the relatively limited number of participants included in the analysis and the large feature space. The predictive performance of the applied models is satisfactory, especially early on in the intervention. The process and models described in this study are technically feasible, although the reliability of the ensuing results may be impacted by limitations to sample size. Owing to the limited number of participants, the results of this study should be replicated in a larger sample. Furthermore, the amount of missing values impacts the analyses and can lead to bias. Obtaining more complete data can further increase the interpretability and predictive accuracy of the models. In addition to time window–based features and time-dependent variables, the demonstrated steps and this study in general do not include time-dependent feature engineering, such as the relation between features and observations across time. Researchers should examine the data set they are planning to analyze to determine whether time-dynamic features could be used in their projects. Another limitation is the fact that the data are heterogeneous at an individual participant level; thus, the application of models that consider heterogeneous parameters might provide deeper and more individualized information about the participants. However, considering the number of participants in the data, heterogeneous models have not yet been investigated. The results are, nevertheless, promising and can lead to increased knowledge about users and how dropout from digital health interventions is affected by various factors. Studies using larger data sets are necessary to improve model performance and confirm findings.</p>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>This study proposes a step-by-step process for the analysis of user journey data in the context of digital health interventions and provides a technical framework. Furthermore, the proposed framework was applied to data from an internet-based intervention for insomnia to predict dropout of participants. These participants needed to complete 7 cores to finish the program. Importantly, our process was able to predict user dropout at each core better than chance. The predictive performance also varied by core; although the AUC was approximately 0.6 for cores 0 and 1, it was noticeably higher for the latter cores. This indicates that the user journey process can be used to predict dropout early in the intervention and prediction accuracy increases over the course of the intervention. This may allow researchers to preemptively address dropout before it occurs by providing support to users that may be struggling to engage. Among the machine learning techniques we evaluated, boosted decision trees provided the greatest accuracy while deleting features that contained more than 15% missing values. In addition, a varying set of features was revealed that contributed to the prediction performance of dropout in this context. Replicating the results of this study in a larger sample is needed to further validate the process outlined in this paper. Researchers may also wish to develop methods that predict the likelihood of user dropout over the duration of an intervention, which could enable researchers to devote resources to those at the highest risk of dropping out.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group/>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">AUC</term>
          <def>
            <p>area under the curve</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">CBT-I</term>
          <def>
            <p>cognitive behavioral therapy for insomnia</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">EMA</term>
          <def>
            <p>Ecological Momentary Assessment</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">KNN</term>
          <def>
            <p>k-nearest neighbor</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">PRAUC</term>
          <def>
            <p>area under the precision-recall curve</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">ROC</term>
          <def>
            <p>receiver operating characteristic</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">SHAP</term>
          <def>
            <p>SHapley Additive exPlanation</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">SHUTi</term>
          <def>
            <p>Sleep Healthy Using the Internet</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb9">SOL</term>
          <def>
            <p>sleep onset latency</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb10">WASO</term>
          <def>
            <p>wake after sleep onset</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>This study was supported by grant R01 MH86758 from the National Institute of Mental Health. The funding source had no role in the design and conduct of the study; collection, management, analysis, and interpretation of the data; preparation, review, or approval of the manuscript; and decision to submit the manuscript for publication. The authors thank Christina Frederick, BS, for her help with the study administration tasks. The authors specially thank Gabe D Heath, BA, and Steve P Johnson, BA, developers of the SHUTi intervention, for extracting and making all the data readily available for analysis.</p>
    </ack>
    <fn-group>
      <fn fn-type="conflict">
        <p>FT and LR report having a financial and/or business interest in BeHealth Solutions and Pear Therapeutics, 2 companies that develop and disseminate digital therapeutics, including by licensing the therapeutic developed, based in part, on early versions of the software utilized in research reported in the enclosed paper. These companies had no role in preparing this manuscript. LR is also a consultant to Mahana Therapeutics, a separate digital therapeutic company not affiliated with this research. Some of the research in this paper was conducted while FT was a faculty member at the University of Virginia. At that time for FT, and ongoing for LR, the terms of these arrangements have been reviewed and approved by the University of Virginia in accordance with its policies.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Saddichha</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Al-Desouki</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Lamia</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Linden</surname>
              <given-names>IA</given-names>
            </name>
            <name name-style="western">
              <surname>Krausz</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Online interventions for depression and anxiety - a systematic review</article-title>
          <source>Health Psychol Behav Med</source>
          <year>2014</year>
          <month>01</month>
          <day>1</day>
          <volume>2</volume>
          <issue>1</issue>
          <fpage>841</fpage>
          <lpage>81</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/25750823"/>
          </comment>
          <pub-id pub-id-type="doi">10.1080/21642850.2014.945934</pub-id>
          <pub-id pub-id-type="medline">25750823</pub-id>
          <pub-id pub-id-type="pii">945934</pub-id>
          <pub-id pub-id-type="pmcid">PMC4346073</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Carlbring</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Andersson</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Cuijpers</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Riper</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Hedman-Lagerlöf</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Internet-based vs. face-to-face cognitive behavior therapy for psychiatric and somatic disorders: an updated systematic review and meta-analysis</article-title>
          <source>Cogn Behav Ther</source>
          <year>2018</year>
          <month>01</month>
          <volume>47</volume>
          <issue>1</issue>
          <fpage>1</fpage>
          <lpage>18</lpage>
          <pub-id pub-id-type="doi">10.1080/16506073.2017.1401115</pub-id>
          <pub-id pub-id-type="medline">29215315</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Erbe</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Eichert</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Riper</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Ebert</surname>
              <given-names>DD</given-names>
            </name>
          </person-group>
          <article-title>Blending face-to-face and internet-based interventions for the treatment of mental disorders in adults: systematic review</article-title>
          <source>J Med Internet Res</source>
          <year>2017</year>
          <month>09</month>
          <day>15</day>
          <volume>19</volume>
          <issue>9</issue>
          <fpage>e306</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2017/9/e306/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/jmir.6588</pub-id>
          <pub-id pub-id-type="medline">28916506</pub-id>
          <pub-id pub-id-type="pii">v19i9e306</pub-id>
          <pub-id pub-id-type="pmcid">PMC5622288</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Melville</surname>
              <given-names>KM</given-names>
            </name>
            <name name-style="western">
              <surname>Casey</surname>
              <given-names>LM</given-names>
            </name>
            <name name-style="western">
              <surname>Kavanagh</surname>
              <given-names>DJ</given-names>
            </name>
          </person-group>
          <article-title>Dropout from internet-based treatment for psychological disorders</article-title>
          <source>Br J Clin Psychol</source>
          <year>2010</year>
          <month>11</month>
          <volume>49</volume>
          <issue>Pt 4</issue>
          <fpage>455</fpage>
          <lpage>71</lpage>
          <pub-id pub-id-type="doi">10.1348/014466509X472138</pub-id>
          <pub-id pub-id-type="medline">19799804</pub-id>
          <pub-id pub-id-type="pii">bjcp840</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Torous</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Lipschitz</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Ng</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Firth</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Dropout rates in clinical trials of smartphone apps for depressive symptoms: a systematic review and meta-analysis</article-title>
          <source>J Affect Disord</source>
          <year>2020</year>
          <month>02</month>
          <day>15</day>
          <volume>263</volume>
          <fpage>413</fpage>
          <lpage>9</lpage>
          <pub-id pub-id-type="doi">10.1016/j.jad.2019.11.167</pub-id>
          <pub-id pub-id-type="medline">31969272</pub-id>
          <pub-id pub-id-type="pii">S0165-0327(19)32606-0</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Horsch</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Lancee</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Beun</surname>
              <given-names>RJ</given-names>
            </name>
            <name name-style="western">
              <surname>Neerincx</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Brinkman</surname>
              <given-names>W</given-names>
            </name>
          </person-group>
          <article-title>Adherence to technology-mediated insomnia treatment: a meta-analysis, interviews, and focus groups</article-title>
          <source>J Med Internet Res</source>
          <year>2015</year>
          <month>09</month>
          <day>4</day>
          <volume>17</volume>
          <issue>9</issue>
          <fpage>e214</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2015/9/e214/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/jmir.4115</pub-id>
          <pub-id pub-id-type="medline">26341671</pub-id>
          <pub-id pub-id-type="pii">v17i9e214</pub-id>
          <pub-id pub-id-type="pmcid">PMC4642391</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wickwire</surname>
              <given-names>EM</given-names>
            </name>
          </person-group>
          <article-title>The value of digital insomnia therapeutics: what we know and what we need to know</article-title>
          <source>J Clin Sleep Med</source>
          <year>2019</year>
          <month>01</month>
          <day>15</day>
          <volume>15</volume>
          <issue>1</issue>
          <fpage>11</fpage>
          <lpage>3</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.5664/jcsm.7558"/>
          </comment>
          <pub-id pub-id-type="doi">10.5664/jcsm.7558</pub-id>
          <pub-id pub-id-type="medline">30621849</pub-id>
          <pub-id pub-id-type="pii">jc-18-00766</pub-id>
          <pub-id pub-id-type="pmcid">PMC6329555</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Vandelanotte</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Spathonis</surname>
              <given-names>KM</given-names>
            </name>
            <name name-style="western">
              <surname>Eakin</surname>
              <given-names>EG</given-names>
            </name>
            <name name-style="western">
              <surname>Owen</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <article-title>Website-delivered physical activity interventions a review of the literature</article-title>
          <source>Am J Prev Med</source>
          <year>2007</year>
          <month>07</month>
          <volume>33</volume>
          <issue>1</issue>
          <fpage>54</fpage>
          <lpage>64</lpage>
          <pub-id pub-id-type="doi">10.1016/j.amepre.2007.02.041</pub-id>
          <pub-id pub-id-type="medline">17572313</pub-id>
          <pub-id pub-id-type="pii">S0749-3797(07)00164-X</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Funk</surname>
              <given-names>KL</given-names>
            </name>
            <name name-style="western">
              <surname>Stevens</surname>
              <given-names>VJ</given-names>
            </name>
            <name name-style="western">
              <surname>Appel</surname>
              <given-names>LJ</given-names>
            </name>
            <name name-style="western">
              <surname>Bauck</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Brantley</surname>
              <given-names>PJ</given-names>
            </name>
            <name name-style="western">
              <surname>Champagne</surname>
              <given-names>CM</given-names>
            </name>
            <name name-style="western">
              <surname>Coughlin</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Dalcin</surname>
              <given-names>AT</given-names>
            </name>
            <name name-style="western">
              <surname>Harvey-Berino</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Hollis</surname>
              <given-names>JF</given-names>
            </name>
            <name name-style="western">
              <surname>Jerome</surname>
              <given-names>GJ</given-names>
            </name>
            <name name-style="western">
              <surname>Kennedy</surname>
              <given-names>BM</given-names>
            </name>
            <name name-style="western">
              <surname>Lien</surname>
              <given-names>LF</given-names>
            </name>
            <name name-style="western">
              <surname>Myers</surname>
              <given-names>VH</given-names>
            </name>
            <name name-style="western">
              <surname>Samuel-Hodge</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Svetkey</surname>
              <given-names>LP</given-names>
            </name>
            <name name-style="western">
              <surname>Vollmer</surname>
              <given-names>WM</given-names>
            </name>
          </person-group>
          <article-title>Associations of internet website use with weight change in a long-term weight loss maintenance program</article-title>
          <source>J Med Internet Res</source>
          <year>2010</year>
          <month>07</month>
          <day>27</day>
          <volume>12</volume>
          <issue>3</issue>
          <fpage>e29</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2010/3/e29/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/jmir.1504</pub-id>
          <pub-id pub-id-type="medline">20663751</pub-id>
          <pub-id pub-id-type="pii">v12i3e29</pub-id>
          <pub-id pub-id-type="pmcid">PMC2956327</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Alkhaldi</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Hamilton</surname>
              <given-names>FL</given-names>
            </name>
            <name name-style="western">
              <surname>Lau</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Webster</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Michie</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Murray</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>The effectiveness of technology-based strategies to promote engagement with digital interventions: a systematic review protocol</article-title>
          <source>J Med Internet Res Protoc</source>
          <year>2015</year>
          <month>04</month>
          <day>28</day>
          <volume>4</volume>
          <issue>2</issue>
          <fpage>e47</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.researchprotocols.org/2015/2/e47/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/resprot.3990</pub-id>
          <pub-id pub-id-type="medline">25921274</pub-id>
          <pub-id pub-id-type="pii">v4i2e47</pub-id>
          <pub-id pub-id-type="pmcid">PMC4429223</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Brouwer</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Kroeze</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Crutzen</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>de Nooijer</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>de Vries</surname>
              <given-names>NK</given-names>
            </name>
            <name name-style="western">
              <surname>Brug</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Oenema</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Which intervention characteristics are related to more exposure to internet-delivered healthy lifestyle promotion interventions: a systematic review</article-title>
          <source>J Med Internet Res</source>
          <year>2011</year>
          <month>01</month>
          <day>6</day>
          <volume>13</volume>
          <issue>1</issue>
          <fpage>e2</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2011/1/e2/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/jmir.1639</pub-id>
          <pub-id pub-id-type="medline">21212045</pub-id>
          <pub-id pub-id-type="pii">v13i1e2</pub-id>
          <pub-id pub-id-type="pmcid">PMC3221341</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Geraghty</surname>
              <given-names>AW</given-names>
            </name>
            <name name-style="western">
              <surname>Wood</surname>
              <given-names>AM</given-names>
            </name>
            <name name-style="western">
              <surname>Hyland</surname>
              <given-names>ME</given-names>
            </name>
          </person-group>
          <article-title>Attrition from self-directed interventions: investigating the relationship between psychological predictors, intervention content and dropout from a body dissatisfaction intervention</article-title>
          <source>Soc Sci Med</source>
          <year>2010</year>
          <month>07</month>
          <volume>71</volume>
          <issue>1</issue>
          <fpage>30</fpage>
          <lpage>7</lpage>
          <pub-id pub-id-type="doi">10.1016/j.socscimed.2010.03.007</pub-id>
          <pub-id pub-id-type="medline">20400220</pub-id>
          <pub-id pub-id-type="pii">S0277-9536(10)00232-7</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Eysenbach</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>The law of attrition</article-title>
          <source>J Med Internet Res</source>
          <year>2005</year>
          <month>03</month>
          <day>31</day>
          <volume>7</volume>
          <issue>1</issue>
          <fpage>e11</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2005/1/e11/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/jmir.7.1.e11</pub-id>
          <pub-id pub-id-type="medline">15829473</pub-id>
          <pub-id pub-id-type="pii">v7e11</pub-id>
          <pub-id pub-id-type="pmcid">PMC1550631</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Pedersen</surname>
              <given-names>DH</given-names>
            </name>
            <name name-style="western">
              <surname>Mansourvar</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Sortsø</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Schmidt</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>The law of attrition predicting dropouts from an electronic health platform for lifestyle interventions: analysis of methods and predictors</article-title>
          <source>J Med Internet Res</source>
          <year>2019</year>
          <month>09</month>
          <day>4</day>
          <volume>21</volume>
          <issue>9</issue>
          <fpage>e13617</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2019/9/e13617/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/13617</pub-id>
          <pub-id pub-id-type="medline">31486409</pub-id>
          <pub-id pub-id-type="pii">v21i9e13617</pub-id>
          <pub-id pub-id-type="pmcid">PMC6753691</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Donkin</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Christensen</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Naismith</surname>
              <given-names>SL</given-names>
            </name>
            <name name-style="western">
              <surname>Neal</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Hickie</surname>
              <given-names>IB</given-names>
            </name>
            <name name-style="western">
              <surname>Glozier</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <article-title>A systematic review of the impact of adherence on the effectiveness of e-therapies</article-title>
          <source>J Med Internet Res</source>
          <year>2011</year>
          <month>08</month>
          <day>5</day>
          <volume>13</volume>
          <issue>3</issue>
          <fpage>e52</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2011/3/e52/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/jmir.1772</pub-id>
          <pub-id pub-id-type="medline">21821503</pub-id>
          <pub-id pub-id-type="pii">v13i3e52</pub-id>
          <pub-id pub-id-type="pmcid">PMC3222162</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Fernández-Álvarez</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Díaz-García</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>González-Robles</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Baños</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>García-Palacios</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Botella</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Dropping out of a transdiagnostic online intervention: a qualitative analysis of client's experiences</article-title>
          <source>Internet Interv</source>
          <year>2017</year>
          <month>12</month>
          <volume>10</volume>
          <fpage>29</fpage>
          <lpage>38</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S2214-7829(17)30052-0"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.invent.2017.09.001</pub-id>
          <pub-id pub-id-type="medline">30135750</pub-id>
          <pub-id pub-id-type="pii">S2214-7829(17)30052-0</pub-id>
          <pub-id pub-id-type="pmcid">PMC6084825</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ritterband</surname>
              <given-names>LM</given-names>
            </name>
            <name name-style="western">
              <surname>Thorndike</surname>
              <given-names>FP</given-names>
            </name>
            <name name-style="western">
              <surname>Cox</surname>
              <given-names>DJ</given-names>
            </name>
            <name name-style="western">
              <surname>Kovatchev</surname>
              <given-names>BP</given-names>
            </name>
            <name name-style="western">
              <surname>Gonder-Frederick</surname>
              <given-names>LA</given-names>
            </name>
          </person-group>
          <article-title>A behavior change model for internet interventions</article-title>
          <source>Ann Behav Med</source>
          <year>2009</year>
          <month>08</month>
          <volume>38</volume>
          <issue>1</issue>
          <fpage>18</fpage>
          <lpage>27</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/19802647"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/s12160-009-9133-4</pub-id>
          <pub-id pub-id-type="medline">19802647</pub-id>
          <pub-id pub-id-type="pmcid">PMC2878721</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Christensen</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Batterham</surname>
              <given-names>PJ</given-names>
            </name>
            <name name-style="western">
              <surname>Gosling</surname>
              <given-names>JA</given-names>
            </name>
            <name name-style="western">
              <surname>Ritterband</surname>
              <given-names>LM</given-names>
            </name>
            <name name-style="western">
              <surname>Griffiths</surname>
              <given-names>KM</given-names>
            </name>
            <name name-style="western">
              <surname>Thorndike</surname>
              <given-names>FP</given-names>
            </name>
            <name name-style="western">
              <surname>Glozier</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>O'Dea</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Hickie</surname>
              <given-names>IB</given-names>
            </name>
            <name name-style="western">
              <surname>Mackinnon</surname>
              <given-names>AJ</given-names>
            </name>
          </person-group>
          <article-title>Effectiveness of an online insomnia program (SHUTi) for prevention of depressive episodes (the GoodNight Study): a randomised controlled trial</article-title>
          <source>Lancet Psychiatry</source>
          <year>2016</year>
          <month>04</month>
          <volume>3</volume>
          <issue>4</issue>
          <fpage>333</fpage>
          <lpage>41</lpage>
          <pub-id pub-id-type="doi">10.1016/S2215-0366(15)00536-2</pub-id>
          <pub-id pub-id-type="medline">26827250</pub-id>
          <pub-id pub-id-type="pii">S2215-0366(15)00536-2</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ritterband</surname>
              <given-names>LM</given-names>
            </name>
            <name name-style="western">
              <surname>Thorndike</surname>
              <given-names>FP</given-names>
            </name>
            <name name-style="western">
              <surname>Gonder-Frederick</surname>
              <given-names>LA</given-names>
            </name>
            <name name-style="western">
              <surname>Magee</surname>
              <given-names>JC</given-names>
            </name>
            <name name-style="western">
              <surname>Bailey</surname>
              <given-names>ET</given-names>
            </name>
            <name name-style="western">
              <surname>Saylor</surname>
              <given-names>DK</given-names>
            </name>
            <name name-style="western">
              <surname>Morin</surname>
              <given-names>CM</given-names>
            </name>
          </person-group>
          <article-title>Efficacy of an internet-based behavioral intervention for adults with insomnia</article-title>
          <source>Arch Gen Psychiatry</source>
          <year>2009</year>
          <month>07</month>
          <volume>66</volume>
          <issue>7</issue>
          <fpage>692</fpage>
          <lpage>8</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/19581560"/>
          </comment>
          <pub-id pub-id-type="doi">10.1001/archgenpsychiatry.2009.66</pub-id>
          <pub-id pub-id-type="medline">19581560</pub-id>
          <pub-id pub-id-type="pii">66/7/692</pub-id>
          <pub-id pub-id-type="pmcid">PMC3723339</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Murray</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Hekler</surname>
              <given-names>EB</given-names>
            </name>
            <name name-style="western">
              <surname>Andersson</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Collins</surname>
              <given-names>LM</given-names>
            </name>
            <name name-style="western">
              <surname>Doherty</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Hollis</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Rivera</surname>
              <given-names>DE</given-names>
            </name>
            <name name-style="western">
              <surname>West</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Wyatt</surname>
              <given-names>JC</given-names>
            </name>
          </person-group>
          <article-title>Evaluating digital health interventions: key questions and approaches</article-title>
          <source>Am J Prev Med</source>
          <year>2016</year>
          <month>11</month>
          <volume>51</volume>
          <issue>5</issue>
          <fpage>843</fpage>
          <lpage>51</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/27745684"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.amepre.2016.06.008</pub-id>
          <pub-id pub-id-type="medline">27745684</pub-id>
          <pub-id pub-id-type="pii">S0749-3797(16)30229-X</pub-id>
          <pub-id pub-id-type="pmcid">PMC5324832</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Iida</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Shrout</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Laurenceau</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Bolger</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <source>Using Diary Methods in Psychological Research</source>
          <year>2012</year>
          <publisher-loc>Washington, DC</publisher-loc>
          <publisher-name>American Psychological Association</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Nottorf</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Mastel</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Funk</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>The user-journey in online search - an empirical study of the generic-to-branded spillover effect based on user-level data</article-title>
          <source>DCNET, ICE-B and OPTICS</source>
          <year>2012</year>
          <conf-name>DIO'12</conf-name>
          <conf-date>July 24-27, 2012</conf-date>
          <conf-loc>Rome, Italy</conf-loc>
          <fpage>145</fpage>
          <lpage>54</lpage>
          <pub-id pub-id-type="doi">10.5220/0004052101450154</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chatterjee</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Hoffman</surname>
              <given-names>D L</given-names>
            </name>
            <name name-style="western">
              <surname>Novak</surname>
              <given-names>T P</given-names>
            </name>
          </person-group>
          <article-title>Modeling the Clickstream: Implications for Web-Based Advertising Efforts</article-title>
          <source>Mark Sci</source>
          <year>2003</year>
          <volume>22</volume>
          <issue>4</issue>
          <fpage>520</fpage>
          <lpage>41</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://pdfs.semanticscholar.org/91f1/cb80189cc3fc7349aff99bb2d69477298bb0.pdf"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Stange</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Funk</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>How Much Tracking Is Necessary - The Learning Curve in Bayesian User Journey Analysis</article-title>
          <source>European Conference on Information Systems</source>
          <year>2015</year>
          <conf-name>ECIS'15</conf-name>
          <conf-date>November 29, 2015</conf-date>
          <conf-loc>Münster, Germany</conf-loc>
          <pub-id pub-id-type="doi">10.18151/7217484</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>van Breda</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Pastor</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Hoogendoorn</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Ruwaard</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Asselbergs</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Riper</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Exploring and Comparing Machine Learning Approaches for Predicting Mood Over Time</article-title>
          <source>KES Conference on Innovation in Medicine and Healthcare</source>
          <year>2016</year>
          <conf-name>IMH'16</conf-name>
          <conf-date>June, 2016</conf-date>
          <conf-loc>Tenerife, Spain</conf-loc>
          <fpage>37</fpage>
          <pub-id pub-id-type="doi">10.1007/978-3-319-39687-3_4</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sen</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Dacin</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Pattichis</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Current Trends in Web Data Analysis</article-title>
          <source>ACM Digital Library</source>
          <year>2006</year>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://dl.acm.org/citation.cfm?id=1167842">http://dl.acm.org/citation.cfm?id=1167842</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Jaques</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Rudovic</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Taylor</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Sano</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Picard</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Predicting Tomorrow’s Mood, Health, and Stress Level using Personalized Multitask Learning and Domain Adaptation</article-title>
          <source>Proceedings of Machine Learning Research</source>
          <year>2017</year>
          <volume>66</volume>
          <fpage>17</fpage>
          <lpage>33</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://proceedings.mlr.press/v66/jaques17a.html"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Becker</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Bremer</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Funk</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Asselbergs</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Riper</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Ruwaard</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>How to Predict Mood: Delving into Features of Smartphone-Based Data</article-title>
          <source>European Conference on Information Systems</source>
          <year>2016</year>
          <conf-name>ECIS'16</conf-name>
          <conf-date>September 1, 2016</conf-date>
          <conf-loc>San Diego, USA</conf-loc>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bremer</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Becker</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Kolovos</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Funk</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>van Breda</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Hoogendoorn</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Riper</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Predicting therapy success and costs for personalized treatment recommendations using baseline characteristics: data-driven analysis</article-title>
          <source>J Med Internet Res</source>
          <year>2018</year>
          <month>08</month>
          <day>21</day>
          <volume>20</volume>
          <issue>8</issue>
          <fpage>e10275</fpage>
          <pub-id pub-id-type="doi">10.2196/10275</pub-id>
          <pub-id pub-id-type="medline">30131318</pub-id>
          <pub-id pub-id-type="pii">v20i8e10275</pub-id>
          <pub-id pub-id-type="pmcid">PMC6123535</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>van Breda</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Bremer</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Becker</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Hoogendoorn</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Funk</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Ruwaard</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Riper</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Predicting therapy success for treatment as usual and blended treatment in the domain of depression</article-title>
          <source>Internet Interv</source>
          <year>2018</year>
          <volume>12</volume>
          <fpage>100</fpage>
          <lpage>104</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S2214-7829(17)30075-1"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.invent.2017.08.003</pub-id>
          <pub-id pub-id-type="medline">29862165</pub-id>
          <pub-id pub-id-type="pii">S2214-7829(17)30075-1</pub-id>
          <pub-id pub-id-type="pmcid">PMC5945603</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>van Breda</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Hoogendoorn</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Eiben</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Andersson</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Riper</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Ruwaard</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Vernmark</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>A Feature Representation Learning Method for Temporal Datasets</article-title>
          <source>2016 IEEE Symposium Series on Computational Intelligence</source>
          <year>2016</year>
          <conf-name>SSCI'16</conf-name>
          <conf-date>December 6-9, 2016</conf-date>
          <conf-loc>Athens, Greece</conf-loc>
          <fpage>1</fpage>
          <lpage>8</lpage>
          <pub-id pub-id-type="doi">10.1109/ssci.2016.7849890</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="web">
          <article-title>A Language and Environment for Statistical Computing</article-title>
          <source>R Core Team</source>
          <year>2018</year>
          <access-date>2020-01-01</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.r-project.org/">https://www.r-project.org/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bremer</surname>
              <given-names>V</given-names>
            </name>
          </person-group>
          <article-title>UJ-Analysis</article-title>
          <source>Github Repos</source>
          <access-date>2020-01-01</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://github.com/VBremer/UJ-Analysis">https://github.com/VBremer/UJ-Analysis</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gosling</surname>
              <given-names>JA</given-names>
            </name>
            <name name-style="western">
              <surname>Glozier</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Griffiths</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Ritterband</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Thorndike</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Mackinnon</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Hehir</surname>
              <given-names>KK</given-names>
            </name>
            <name name-style="western">
              <surname>Bennett</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Bennett</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Christensen</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>The GoodNight study--online CBT for insomnia for the indicated prevention of depression: study protocol for a randomised controlled trial</article-title>
          <source>Trials</source>
          <year>2014</year>
          <month>02</month>
          <day>13</day>
          <volume>15</volume>
          <fpage>56</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://trialsjournal.biomedcentral.com/articles/10.1186/1745-6215-15-56"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/1745-6215-15-56</pub-id>
          <pub-id pub-id-type="pii">1745-6215-15-56</pub-id>
          <pub-id pub-id-type="pmcid">PMC3926259</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kotsiantis</surname>
              <given-names>S B</given-names>
            </name>
          </person-group>
          <article-title>Supervised Machine Learning: A Review of Classification Techniques</article-title>
          <source>Informatica</source>
          <year>2007</year>
          <volume>31</volume>
          <issue>3</issue>
          <fpage>249</fpage>
          <lpage>268</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Batista</surname>
              <given-names>GEAPA</given-names>
            </name>
            <name name-style="western">
              <surname>Monard</surname>
              <given-names>MC</given-names>
            </name>
          </person-group>
          <article-title>An analysis of four missing data treatment methods for supervised learning</article-title>
          <source>Applied Artificial Intelligence</source>
          <year>2003</year>
          <month>05</month>
          <volume>17</volume>
          <issue>5-6</issue>
          <fpage>519</fpage>
          <lpage>533</lpage>
          <pub-id pub-id-type="doi">10.1080/713827181</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Domingos</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>A few useful things to know about machine learning</article-title>
          <source>Commun ACM</source>
          <year>2012</year>
          <volume>55</volume>
          <issue>10</issue>
          <fpage>78</fpage>
          <pub-id pub-id-type="doi">10.1145/2347736.2347755</pub-id>
          <pub-id pub-id-type="medline">1000183096</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref38">
        <label>38</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kanter</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Veeramachaneni</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Deep Feature Synthesis: Towards Automating Data Science Endeavors</article-title>
          <source>IEEE International Conference on Data Science and Advanced Analytics</source>
          <year>2015</year>
          <conf-name>DSAA'15</conf-name>
          <conf-date>October 19-21, 2015</conf-date>
          <conf-loc>Paris, France</conf-loc>
          <fpage>1</fpage>
          <lpage>10</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref39">
        <label>39</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Khurana</surname>
              <given-names>U</given-names>
            </name>
            <name name-style="western">
              <surname>Nargesian</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Samulowitz</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Khalil</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Turaga</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Automating Feature Engineering</article-title>
          <source>NIPS workshop</source>
          <year>2016</year>
          <conf-name>NIPS'16</conf-name>
          <conf-date>5-10 December, 2016</conf-date>
          <conf-loc>Barcelona, Spain</conf-loc>
        </nlm-citation>
      </ref>
      <ref id="ref40">
        <label>40</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lam</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Thiebaut</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Sinn</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Mai</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Alkan</surname>
              <given-names>O</given-names>
            </name>
          </person-group>
          <article-title>One button machine for automating feature engineering in relational databases</article-title>
          <source>arxiv</source>
          <year>2017</year>
          <access-date>2018-06-10</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1706.00327">https://arxiv.org/abs/1706.00327</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref41">
        <label>41</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cheng</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Kasneci</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Graepel</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Stern</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Herbrich</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Automated Feature Generation From Structured Knowledge</article-title>
          <source>Conference on Information and Knowledge Management</source>
          <year>2011</year>
          <conf-name>CIKM'11</conf-name>
          <conf-date>October 11, 2011</conf-date>
          <conf-loc>Glasgow, Scotland, UK</conf-loc>
          <fpage>1395</fpage>
          <lpage>1404</lpage>
          <pub-id pub-id-type="doi">10.1145/2063576.2063779</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref42">
        <label>42</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lu</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Jin</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>RAPID: Rating Pictorial Aesthetics using Deep Learning</article-title>
          <source>Proceedings of the ACM International Conference on Multimedia</source>
          <year>2014</year>
          <conf-name>ACM'14</conf-name>
          <conf-date>November, 2014</conf-date>
          <conf-loc>Orlando, Florida, USA</conf-loc>
          <fpage>457</fpage>
          <lpage>466</lpage>
          <pub-id pub-id-type="doi">10.1145/2647868.2654927</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref43">
        <label>43</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Marcus</surname>
              <given-names>AH</given-names>
            </name>
            <name name-style="western">
              <surname>Elias</surname>
              <given-names>RW</given-names>
            </name>
          </person-group>
          <article-title>Some useful statistical methods for model validation</article-title>
          <source>Environ Health Perspect</source>
          <year>1998</year>
          <volume>106</volume>
          <fpage>1541</fpage>
          <lpage>50</lpage>
          <pub-id pub-id-type="doi">10.1289/ehp.98106s61541</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref44">
        <label>44</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Arboretti</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Salmaso</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Model performance analysis and model validation in logistic regression</article-title>
          <source>Statistica</source>
          <year>2003</year>
          <volume>63</volume>
          <issue>2</issue>
          <fpage>375</fpage>
          <lpage>396</lpage>
          <pub-id pub-id-type="doi">10.6092/issn.1973-2201/358</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref45">
        <label>45</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Fawcett</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>An introduction to ROC analysis</article-title>
          <source>Pattern Recognition Letters</source>
          <year>2006</year>
          <month>6</month>
          <volume>27</volume>
          <issue>8</issue>
          <fpage>861</fpage>
          <lpage>874</lpage>
          <pub-id pub-id-type="doi">10.1016/j.patrec.2005.10.010</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref46">
        <label>46</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Tibshirani</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Regression Shrinkage and Selection Via the Lasso</article-title>
          <source>Journal of the Royal Statistical Society: Series B (Methodological)</source>
          <year>2018</year>
          <month>12</month>
          <day>05</day>
          <volume>58</volume>
          <issue>1</issue>
          <fpage>267</fpage>
          <lpage>288</lpage>
          <pub-id pub-id-type="doi">10.1111/j.2517-6161.1996.tb02080.x</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref47">
        <label>47</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Thorndike</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Saylor</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Bailey</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Gonder-Frederick</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Morin</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Ritterband</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Development and Perceived Utility and Impact of an Internet Intervention for Insomnia</article-title>
          <source>EJAP</source>
          <year>2008</year>
          <month>12</month>
          <day>23</day>
          <volume>4</volume>
          <issue>2</issue>
          <fpage>32</fpage>
          <lpage>42</lpage>
          <pub-id pub-id-type="doi">10.7790/ejap.v4i2.133</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref48">
        <label>48</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ritterband</surname>
              <given-names>LM</given-names>
            </name>
            <name name-style="western">
              <surname>Thorndike</surname>
              <given-names>FP</given-names>
            </name>
            <name name-style="western">
              <surname>Ingersoll</surname>
              <given-names>KS</given-names>
            </name>
            <name name-style="western">
              <surname>Lord</surname>
              <given-names>HR</given-names>
            </name>
            <name name-style="western">
              <surname>Gonder-Frederick</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Frederick</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Quigg</surname>
              <given-names>MS</given-names>
            </name>
            <name name-style="western">
              <surname>Cohn</surname>
              <given-names>WF</given-names>
            </name>
            <name name-style="western">
              <surname>Morin</surname>
              <given-names>CM</given-names>
            </name>
          </person-group>
          <article-title>Effect of a Web-Based Cognitive Behavior Therapy for Insomnia Intervention With 1-Year Follow-up: A Randomized Clinical Trial</article-title>
          <source>JAMA Psychiatry</source>
          <year>2017</year>
          <month>01</month>
          <day>01</day>
          <volume>74</volume>
          <issue>1</issue>
          <fpage>68</fpage>
          <lpage>75</lpage>
          <pub-id pub-id-type="doi">10.1001/jamapsychiatry.2016.3249</pub-id>
          <pub-id pub-id-type="pii">2589161</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref49">
        <label>49</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Carney</surname>
              <given-names>CE</given-names>
            </name>
            <name name-style="western">
              <surname>Buysse</surname>
              <given-names>DJ</given-names>
            </name>
            <name name-style="western">
              <surname>Ancoli-Israel</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Edinger</surname>
              <given-names>JD</given-names>
            </name>
            <name name-style="western">
              <surname>Krystal</surname>
              <given-names>AD</given-names>
            </name>
            <name name-style="western">
              <surname>Lichstein</surname>
              <given-names>KL</given-names>
            </name>
            <name name-style="western">
              <surname>Morin</surname>
              <given-names>CM</given-names>
            </name>
          </person-group>
          <article-title>The consensus sleep diary: standardizing prospective sleep self-monitoring</article-title>
          <source>Sleep</source>
          <year>2012</year>
          <month>02</month>
          <day>01</day>
          <volume>35</volume>
          <issue>2</issue>
          <fpage>287</fpage>
          <lpage>302</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.doi.org/10.5665/sleep.1642"/>
          </comment>
          <pub-id pub-id-type="doi">10.5665/sleep.1642</pub-id>
          <pub-id pub-id-type="medline">22294820</pub-id>
          <pub-id pub-id-type="pmcid">PMC3250369</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref50">
        <label>50</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lundberg</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>A Unified Approach to Interpreting Model Predictions</article-title>
          <source>Neural Information Processing Systems</source>
          <year>2017</year>
          <conf-name>NIPS'17</conf-name>
          <conf-date>December 4-9, 2017</conf-date>
          <conf-loc>Long Beach, USA</conf-loc>
          <fpage>4765</fpage>
          <lpage>4774</lpage>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
