<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMIR</journal-id>
      <journal-id journal-id-type="nlm-ta">J Med Internet Res</journal-id>
      <journal-title>Journal of Medical Internet Research</journal-title>
      <issn pub-type="epub">1438-8871</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v21i9e13617</article-id>
      <article-id pub-id-type="pmid">31486409</article-id>
      <article-id pub-id-type="doi">10.2196/13617</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Predicting Dropouts From an Electronic Health Platform for Lifestyle Interventions: Analysis of Methods and Predictors</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Eysenbach</surname>
            <given-names>Gunther</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>de Vries</surname>
            <given-names>Herman</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Miyoshi</surname>
            <given-names>Newton</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Kamalakannan</surname>
            <given-names>Sridharan</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Pedersen</surname>
            <given-names>Daniel Hansen</given-names>
          </name>
          <degrees>MSc</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>Liva Healthcare A/S</institution>
            <addr-line>Danneskiold-Samsøes Allé 41</addr-line>
            <addr-line>Copenhagen, 1434</addr-line>
            <country>Denmark</country>
            <phone>45 70 20 88 22</phone>
            <email>dhp@livahealthcare.com</email>
          </address>
          <ext-link ext-link-type="orcid">http://orcid.org/0000-0002-5450-6125</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author">
          <name name-style="western">
            <surname>Mansourvar</surname>
            <given-names>Marjan</given-names>
          </name>
          <degrees>MSc, PhD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">http://orcid.org/0000-0001-6492-7858</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Sortsø</surname>
            <given-names>Camilla</given-names>
          </name>
          <degrees>MSc, PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">http://orcid.org/0000-0002-6831-954X</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Schmidt</surname>
            <given-names>Thomas</given-names>
          </name>
          <degrees>MSc, PhD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">http://orcid.org/0000-0003-4476-8559</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Liva Healthcare A/S</institution>
        <addr-line>Copenhagen</addr-line>
        <country>Denmark</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Centre of Health Informatics and Technology</institution>
        <institution>The Maersk Mc-Kinney Moller Institute</institution>
        <institution>University of Southern Denmark</institution>
        <addr-line>Odense</addr-line>
        <country>Denmark</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Daniel Hansen Pedersen 
        <email>dhp@livahealthcare.com</email></corresp>
      </author-notes>
      <pub-date pub-type="collection"><month>09</month><year>2019</year></pub-date>
      <pub-date pub-type="epub">
        <day>04</day>
        <month>09</month>
        <year>2019</year>
      </pub-date>
      <volume>21</volume>
      <issue>9</issue>
      <elocation-id>e13617</elocation-id>
      <history>
        <date date-type="received">
          <day>5</day>
          <month>2</month>
          <year>2019</year>
        </date>
        <date date-type="rev-request">
          <day>29</day>
          <month>4</month>
          <year>2019</year>
        </date>
        <date date-type="rev-recd">
          <day>24</day>
          <month>6</month>
          <year>2019</year>
        </date>
        <date date-type="accepted">
          <day>7</day>
          <month>7</month>
          <year>2019</year>
        </date>
      </history>
      <copyright-statement>©Daniel Hansen Pedersen, Marjan Mansourvar, Camilla Sortsø, Thomas Schmidt. Originally published in the Journal of Medical Internet Research (http://www.jmir.org), 04.09.2019.</copyright-statement>
      <copyright-year>2019</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in the Journal of Medical Internet Research, is properly cited. The complete bibliographic information, a link to the original publication on http://www.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://www.jmir.org/2019/9/e13617/" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>The increasing prevalence and economic impact of chronic diseases challenge health care systems globally. Digital solutions can potentially improve efficiency and quality of care, but these initiatives struggle with nonusage attrition. Machine learning methods have been proven to predict dropouts in other settings but lack implementation in health care.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>This study aimed to gain insight into the causes of attrition for patients in an electronic health (eHealth) intervention for chronic lifestyle diseases and evaluate if attrition can be predicted and consequently prevented. We aimed to build predictive models that can identify patients in a digital lifestyle intervention at high risk of dropout by analyzing several predictor variables applied in different models and to further assess the possibilities and impact of implementing such models into an eHealth platform.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>Data from 2684 patients using an eHealth platform were iteratively analyzed using logistic regression, decision trees, and random forest models. The dataset was split into a 79.99% (2147/2684) training and cross-validation set and a 20.0% (537/2684) holdout test set. Trends in activity patterns were analyzed to assess engagement over time. Development and implementation were performed iteratively with health coaches.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>Patients in the test dataset were classified as dropouts with an 89% precision using a random forest model and 11 predictor variables. The most significant predictors were the provider of the intervention, 2 weeks inactivity, and the number of advices received from the health coach. Engagement in the platform dropped significantly leading up to the time of dropout.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>Dropouts from eHealth lifestyle interventions can be predicted using various data mining methods. This can support health coaches in preventing attrition by receiving proactive warnings. The best performing predictive model was found to be the random forest.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>eHealth</kwd>
        <kwd>patient dropouts</kwd>
                <kwd>adherence</kwd>
                <kwd>law of attrition</kwd>
        <kwd>digital health</kwd>
        <kwd>chronic disease</kwd>
        <kwd>data mining</kwd>
        <kwd>logistic regression</kwd>
        <kwd>decision trees</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <sec>
        <title>Background</title>
        <p>Chronic diseases such as diabetes, heart disease, chronic obstructive pulmonary disease, and cancer are collectively responsible for more than two-thirds of all deaths and 75% of the health care budget spending in Europe [<xref ref-type="bibr" rid="ref1">1</xref>]. The increasing prevalence and enormous economic impact of chronic diseases are a critical threat to health care systems. This necessitates new treatments that can effectively handle more people at a lower resource-to-outcome ratio. The application of mobile computing and communication technology in health care (denoted as electronic health [eHealth]) has introduced new possibilities in terms of improving efficiency and quality of care [<xref ref-type="bibr" rid="ref2">2</xref>]. Despite several studies showing promising results in terms of outcomes such as weight loss [<xref ref-type="bibr" rid="ref3">3</xref>] and behavior change [<xref ref-type="bibr" rid="ref4">4</xref>], the evidence for long-term effectiveness, and especially how to retain patients in digital interventions, remains limited [<xref ref-type="bibr" rid="ref5">5</xref>,<xref ref-type="bibr" rid="ref6">6</xref>,<xref ref-type="bibr" rid="ref7">7</xref>].</p>
        <p>In any eHealth program, adherence is a key challenge, as a substantial proportion of patients stop using the application and thus drop out of the intervention program before its completion, referred to as <italic>nonusage attrition</italic>, or simply a <italic>dropout</italic> [<xref ref-type="bibr" rid="ref8">8</xref>]. Recently, a dropout rate of 72% was reported in an eHealth intervention for adults with type 2 diabetes [<xref ref-type="bibr" rid="ref9">9</xref>], and more generally, dropout rates up to as high as 83% are reported [<xref ref-type="bibr" rid="ref10">10</xref>,<xref ref-type="bibr" rid="ref11">11</xref>]. Studies have sought to identify predictors of dropout, but a consistent set of predictors has not yet been identified [<xref ref-type="bibr" rid="ref12">12</xref>]. Previous studies have found engagement and participation in an online forum [<xref ref-type="bibr" rid="ref13">13</xref>], depressive mood [<xref ref-type="bibr" rid="ref14">14</xref>], age, gender, vocational education and employment status [<xref ref-type="bibr" rid="ref15">15</xref>], disease severity, treatment length, and chronicity [<xref ref-type="bibr" rid="ref11">11</xref>] to be related to attrition. Prediction of dropouts has been evaluated in multiple studies, in which many often have been offset in an educational institution setting where high dropout rates are also a great concern. Survival analysis [<xref ref-type="bibr" rid="ref16">16</xref>], logistic regression, random forest, and other machine learning algorithms [<xref ref-type="bibr" rid="ref17">17</xref>] are commonly applied to address this problem, using demographics and other characteristics to predict dropout. The documented high attrition rates from eHealth interventions make it an attractive case to apply similar methods to predict patients at high risk of dropping out. Furthermore, the literature on data mining and predictive methods in relation to attrition in eHealth settings is very limited, suggesting a lack of implementation and integration of these methods in the eHealth domain.</p>
      </sec>
      <sec>
        <title>Objectives</title>
        <p>The aim of our study was to assess the variables and methods for predicting dropouts of patients with chronic diseases in a digital lifestyle intervention and review their applicability for implementation in an eHealth platform. We utilized self-reported data including patient-reported outcome measures (PROMs) submitted by chronic lifestyle disease patients in an eHealth intervention provided by the Liva Healthcare (LIVA) platform (<xref ref-type="boxed-text" rid="box1">Textbox 1</xref>). To assess the research question of <italic>how self-reported data can be applied to address the challenge of attrition in an eHealth setting</italic>, we evaluated the factors associated with dropout and applied logistic regression, decision trees, and random forest. We proposed how these models can be implemented to visualize the results as predictive warnings to reduce dropouts. In this way, the data are used to improve the eHealth intervention by supporting health professionals and enabling them to re-engage patients at high risk of attrition. As defined previously, we applied a broad definition of eHealth given the scope of the intervention under study that is targeting a wide range of patients. However, the challenge of attrition is relevant for most eHealth interventions for lifestyle change.</p>
        <boxed-text id="box1" position="float">
          <title>Short description of the LIVA platform and intervention.</title>
          <p>LIVA is a digital platform designed to facilitate lifestyle changes for patients with chronic diseases. The platform is used by Danish municipalities. Patients have an initial goal-setting meeting with their coach and are introduced to the LIVA app that allows setting and registering health goals (eg, steps, weight, exercise, or diet), monitoring progress, dialog with the health coach by receiving advice and sending messages, and participation in an online forum. Health coaches access the platform through an internet browser and are able to proactively advise patients on a weekly-to-monthly basis based on their patients’ input in the platform. Personal data and health information are collected from the patients during the 3 to 12-month intervention program to provide the treatment service and for research purposes.</p>
        </boxed-text>
      </sec>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Ethics and Approvals</title>
        <p>Only pseudonymized data for which patients had granted their consent to make them available for research purposes were used in this study. Consent was obtained explicitly in the sign-up flow before the patient’s use of the service. Liva Healthcare processes the data as the data processor, using the means and purpose defined by the data controllers, that is, Danish municipalities.</p>
      </sec>
      <sec>
        <title>Data Collection and Selection</title>
        <p>The study was retrospective, applying data collected by Liva Healthcare from June 7, 2016, to March 21, 2018. For active users, the anchor time point for features was the date of data collection and for dropouts, we went back 4 weeks before the date of dropout (see next section). Data were extracted from a Microsoft SQL database and further processed in Alteryx. The dataset contains several unvalidated PROMs and sociodemographic information entered by the health coaches. Consequently, values for weight loss and body mass index (BMI) were filtered to remove extreme outliers and unrealistic values (weight differences of &gt;3.5 kg/week on average for weight registrations over 30 days or more and BMI &gt;100 kg/m<sup>2</sup>). The dataset was cleansed to only include patients who were referred to the platform by their doctor or municipality and showed commitment to the intervention by being properly set up in an advisory, received 3 or more advices from their coach, and had been active in the platform for at least 14 days (N=2684). A baseline of 14 days was selected as patients receive weekly advice in the beginning and should therefore receive their third advice on the fourteenth day of the program. Patients with <italic>less</italic> than 3 advices and 14 days of participation in the program were removed from the dataset as they had either not yet shown commitment to the intervention, signed up by a mistake, or merely signed up within the last 14 days of data collection. Thus, it is not known if these patients dropped out or never meant to use the service.</p>
      </sec>
      <sec>
        <title>Definition of Dropout</title>
        <p>Generally, a large variation in the reporting and measurement of adherence is seen in previous literature [<xref ref-type="bibr" rid="ref18">18</xref>]. For the objective of this study, it was relevant to look at dropouts as patients who commit to the intervention and thenceforward discontinue using the platform, consequently dropping out. We proposed a definition of dropout that aligns with Eysenbach’s characterization from 2005 [<xref ref-type="bibr" rid="ref8">8</xref>] and other operationalizations [<xref ref-type="bibr" rid="ref11">11</xref>]. Dropout is hence defined as <italic>4 consecutive weeks of not performing any actions,</italic>  <italic>for example,</italic>  <italic>registrations or messages, in the platform</italic>. The threshold for a dropout after 4 weeks of inactivity was defined based on the insight that less than 5.0% (117/2684) of the patients become re-engaged in the program after 4 weeks of inactivity. For 79.2% (2126/2684) of the patients, 2 weeks of inactivity equals dropout, and 84.61% (2271/2684) of them drop out after 3 weeks of inactivity. Furthermore, dropouts are limited to the active coaching period, which is a maximum of 12 months. This might be lower for some providers, for example, 3 or 6 months. Patients who are still active after 12 months will move to a <italic>retention</italic> phase, and they will thus not be considered as dropouts if they discontinue the intervention.</p>
      </sec>
      <sec>
        <title>Analysis of Dataset</title>
        <p>To gain insight into the population and understand the factors associated with nonusage attrition, we performed several analyses of the users’ activity patterns by illustrating activity over time in the program for several subgroups of the population. We defined a formula for the current activity level in percentage based on these insights. Some descriptive user statistics of the population and analysis of predictors in <italic>t</italic> test and simple logistic regression models are provided to gain additional insights.</p>
      </sec>
      <sec>
        <title>Data Mining and Model Evaluation</title>
        <p>The Cross-Industry Standard Process for Data Mining framework [<xref ref-type="bibr" rid="ref19">19</xref>] was applied as an iterative data mining approach. This allowed for several iterations of the models to be developed as the knowledge of the population increased based on a better understanding for the dataset and end users (health coaches) who were able to provide feedback during each iteration.</p>
        <p>Inspired by studies performed in an educational setting, logistic regression, decision trees, and random forest methods were applied to classify participants in the intervention into either dropouts or nondropouts based on specific characteristics.</p>
        <p>We tested 11 variables that were well represented in the dataset as potential predictors of dropout: Gender, age group, provider of the intervention, period of intervention, BMI at the beginning of the intervention, weight loss, number of advices received, number of messages sent, total number of weeks with inactivity, and inactivity in the last 1 or 2 consecutive weeks. For weight loss, we required registrations over at least 30 days to be included. Less than 20% of the patients who registered had provided their educational status and zip code, because of the low quantity, these variables were not applied.</p>
        <p>The variables in the final models for logistic regression were selected based on mixed backward and forward selection using the Akaike information criterion [<xref ref-type="bibr" rid="ref20">20</xref>]. For decision trees and random forest, variables were selected inherently by the hyperparameters. The minimum number of records allowed for a split and a terminal node was set to 50 and 25, respectively. The maximum allowed depth in the final tree was set to 10 to avoid overfitting. The trees were pruned with a complexity parameter set to 0.01 to reduce the number of branches and the relative error.</p>
        <p>To assess the quality of the 3 different models and to compare the predictive performance, the dataset was split into an 80% training and cross-validation set and a 20% test set. Owing to the relatively small size of the dataset, the training and cross-validation were performed using stratified 10-fold cross-validation. Stratification was applied on the target variable to ensure each fold was a good representative of the overall dataset distribution to reduce the bias and variance of the models. The best performing method was then applied to the 20% holdout test set that had never been seen by the model. The quality of the models was assessed based on the area under the curve (AUC) on the receiver operating characteristic (ROC) curve, the precision, and the accuracy. The goal was to have a high precision as the false negatives were the most critical to reduce in this scenario, that is, patients at high risk for dropout not identified as a potential dropout.</p>
      </sec>
      <sec>
        <title>Adoption and Implementation</title>
        <p>The findings from this study have been diffused among the health coaches using the Liva Healthcare platform and prototype models have been implemented into the platform. Interviews were conducted with health coaches to adjust the models in terms of when and how warnings should be present. Feedback was continuously collected, and data were analyzed to assess dropout rate following implementation.</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>User Statistics</title>
        <p>The final dataset contained 2684 patients registered in the LIVA database. The population was characterized by a greater proportion of females (1943/2684, 72.39%) compared with males (741/2684, 27.6%). The majority of the population was in the age range of 40 to 59 years, and the average lifetime on the platform was 108 days. Overweight patients represent the largest treatment group, but patients might enter the program with one or more of 7 other diseases and possible comorbidities (other secondary disease). Additional characteristics of the population are provided in <xref ref-type="table" rid="table1">Table 1</xref> (<italic>advice received</italic> and <italic>messages sent</italic> refer to the dialog between patient and health coach).</p>
        <p>The intervention status for the patients was that 53.99% (1449/2684) had dropped out, 39.43% (1060/2684) were currently in active advisory, 3.7% (100/2684) had completed the intervention (finished intervention after &gt;12 months), and 3% (75/2684) were in the retention phase (&gt;12 months in program). More than 1 in 4 dropouts had occurred in the first month of the program (between day 14 and 31, n=388, 26.8% of dropouts; <xref ref-type="table" rid="table2">Table 2</xref>).</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>A summary of the population from the final dataset included in the models.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="350"/>
            <col width="650"/>
            <thead>
              <tr valign="top">
                <td>Parameter (statistic)</td>
                <td>Description</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Sample size (N)</td>
                <td>2684 patients</td>
              </tr>
              <tr valign="top">
                <td>Number of providers (N)</td>
                <td>18 different providers with between 13 and 581 patients ever in program</td>
              </tr>
              <tr valign="top">
                <td>Gender (percentage distribution)</td>
                <td>72.4% females and 27.6% males</td>
              </tr>
              <tr valign="top">
                <td>Age (years), mean (SD)</td>
                <td>48.6 (13.2)</td>
              </tr>
              <tr valign="top">
                <td>Treatment groups (percentage distribution)</td>
                <td>Overweight (85%), diabetes (17%), heart diseases (12%), chronic obstructive pulmonary disease (5%), stress (15%), cancer (1%), alcoholism (1%), smoking (6%), or another secondary disease (20%)</td>
              </tr>
              <tr valign="top">
                <td>Days on platform (minimum, median, maximum)</td>
                <td>14, 82, 595</td>
              </tr>
              <tr valign="top">
                <td>Start body mass index (kg/m<sup>2</sup>), mean (SD)</td>
                <td>33.6 (6.0)</td>
              </tr>
              <tr valign="top">
                <td>Advice received (minimum, median, maximum)</td>
                <td>3, 7, 99</td>
              </tr>
              <tr valign="top">
                <td>Messages sent (minimum, median, maximum)</td>
                <td>0, 3, 156</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>Number of dropouts over the period of intervention.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="500"/>
            <col width="500"/>
            <thead>
              <tr valign="top">
                <td>Months of intervention</td>
                <td>Number of dropouts</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>1</td>
                <td>388</td>
              </tr>
              <tr valign="top">
                <td>2-4</td>
                <td>633</td>
              </tr>
              <tr valign="top">
                <td>5-8</td>
                <td>300</td>
              </tr>
              <tr valign="top">
                <td>9-12</td>
                <td>128</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
      </sec>
      <sec>
        <title>Preliminary Analysis of Predictors</title>
        <p>In total, a larger proportion of females (1069/1943, 55.02%) had dropped out compared with males (380/741, 51.3%), and for age groups, the highest dropout rate was found among the oldest (above 75 years; <xref ref-type="table" rid="table3">Table 3</xref>).</p>
        <p>Characteristics for the patients in active advisory and the dropouts were assessed for the predictor variables of interest. Gender was found to be significant in a Welch 2-sample <italic>t</italic> test <italic>(P=</italic>.01). Dropouts had a slightly lower starting BMI than the active patients <italic>(P=</italic>.01). No major differences were seen in average age among the 2 groups <italic>(P=</italic>.60) nor average weight loss <italic>(P=</italic>.88). Large variations in the risk for dropout were found among the different providers of the intervention, varying from 7.3% to 87.0% in a simple logistic regression model.</p>
        <table-wrap position="float" id="table3">
          <label>Table 3</label>
          <caption>
            <p>Percentage of dropouts distributed in age group and gender. The percentage indicates the proportion of dropouts for the patients in the specific age group and gender.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="250"/>
            <col width="250"/>
            <col width="250"/>
            <col width="250"/>
            <thead>
              <tr valign="bottom">
                <td>Age group (years)</td>
                <td>Female, n (%)</td>
                <td>Male, n (%)</td>
                <td>Total, n (%)<sup>a</sup></td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>18-39</td>
                <td>600 (53.72)</td>
                <td>161 (51.60)</td>
                <td>761 (53.18)</td>
              </tr>
              <tr valign="top">
                <td>40-59</td>
                <td>1040 (55.48)</td>
                <td>395 (50.13)</td>
                <td>1435 (54.01)</td>
              </tr>
              <tr valign="top">
                <td>60-74</td>
                <td>284 (52.11)</td>
                <td>161 (50.31)</td>
                <td>445 (51.39)</td>
              </tr>
              <tr valign="top">
                <td>&gt;75</td>
                <td>19 (63.33)</td>
                <td>24 (54.55)</td>
                <td>43 (58.11)</td>
              </tr>
              <tr valign="top">
                <td>Total</td>
                <td>1943 (55.02)</td>
                <td>741 (51.32)</td>
                <td>2684 (53.99)</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table3fn1">
              <p><sup>a</sup>Percentage of the total population of participants.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Activity Analysis</title>
        <p>The largest proportion of active patients was in months 2 to 4 in the program and the lowest proportion was found at the beginning of the program in month 1 (<xref ref-type="fig" rid="figure1">Figure 1</xref>). The odds for a patient dropping out in the first month of the intervention were 4.35 times higher than for dropping out past month 8.</p>
        <p>We analyzed if trends in the patients’ activity patterns could identify attrition by analyzing patient engagement in the platform over time. Evidently, patients who drop out have a very low level of activity (defined as a registration, forum posting, or messaging the coach) in the last weeks of their time on the platform. Overall, 71.77% (1040/1449) of the dropouts decreased their activity level by more than 50% in their last 2 weeks. However, there is also an expected decrease in activity that will occur over time, and individuals will have different trend lines for patterns in activity (<xref ref-type="fig" rid="figure2">Figure 2</xref>). The first week in the program (week 0) was found to have a significantly higher amount of registrations than the remaining weeks, on average 23.8 registrations, and was removed from the analysis to prevent skewing the linear regression line.</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>Proportion of active patients over 4 segments of the intervention period.</p>
          </caption>
          <graphic xlink:href="jmir_v21i9e13617_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>Average number of activities in the platform per week in the program for patients who either completed the intervention or entered retention (n=175), excluding week 0 in the program.</p>
          </caption>
          <graphic xlink:href="jmir_v21i9e13617_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <p>A variable for the patient’s current activity level defined in percentage of the baseline activity level was proposed (Equation 1). The variable accounts for (1) the patient’s average activity in the last 2 weeks, (2) the patient’s baseline activity (defined as the average activity in weeks 1-4 of the program), and (3) the regression line for the average activity levels over time (<xref ref-type="fig" rid="figure2">Figure 2</xref>). The average decrease in activities was found to be 0.094 per week. Patients with zero activity in the last 2 weeks will per default have a current activity level of 0%.</p>
        <disp-formula>Current activity level (%) = Average activity last two weeks / (Baseline activity - (0.094 × weeks in program)) (1)</disp-formula>
        <p>Fewer women tended to be active at the same number of inactive weeks compared with males, up to 40 weeks of inactivity, at which point of time, a very small percentage were still active in the program at the time of the data collection (<xref ref-type="fig" rid="figure3">Figure 3</xref>). For age groups, the oldest and the youngest age groups were the least active (<xref ref-type="fig" rid="figure4">Figure 4</xref>). The oldest group was also found to have the highest rate of dropouts (<xref ref-type="table" rid="table3">Table 3</xref>).</p>
        <fig id="figure3" position="float">
          <label>Figure 3</label>
          <caption>
            <p>Proportion of active patients over the total number of inactive weeks in the program defined by gender.</p>
          </caption>
          <graphic xlink:href="jmir_v21i9e13617_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <fig id="figure4" position="float">
          <label>Figure 4</label>
          <caption>
            <p>Proportion of active patients over the total number of inactive weeks in the program defined by age.</p>
          </caption>
          <graphic xlink:href="jmir_v21i9e13617_fig4.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <fig id="figure5" position="float">
          <label>Figure 5</label>
          <caption>
            <p>Variable importance plot for the 11 selected variables. Period of intervention is separated into 4 dummy variables.</p>
          </caption>
          <graphic xlink:href="jmir_v21i9e13617_fig5.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <p>Finally, introducing a variable that accounts for inactivity in the last 2 weeks of the program in any of the proposed models resulted in it becoming incomparably significant to any of the models’ other variables, except for provider of intervention (partnerName; see <xref ref-type="fig" rid="figure5">Figure 5</xref>). Hence, inactivity in the platform should be a critical warning for the health coach. A significant decrease in activity that deviates from the overall pattern might also be a critical sign for attrition and consequently an attention mark for the health coach.</p>
      </sec>
      <sec>
        <title>Model Selection for Dropout Prediction</title>
        <p>The random forest achieved an AUC of 0.92 on the ROC chart and a Gini score of 0.84 on the stratified cross-validated training data, making it the best-performing model (<xref ref-type="fig" rid="figure6">Figure 6</xref>) compared with decision trees and logistic regression (<xref ref-type="table" rid="table4">Table 4</xref>). When applied to the holdout test data, the AUC increases by 0.01, and the model is thus not suspect to overfitting.</p>
        <p>The precision of the random forest model was 0.89, with an overall accuracy of 0.86. This means that 89% of all dropouts were classified correctly as dropouts and 11% were mistakenly classified as active patients. This corresponds with 88.7% (253/285) dropouts in the holdout test data being classified correctly as dropouts.</p>
        <p>The outlined models for inactivity, activity level, and dropout risk have been implemented into the LIVA platform for the health coaches to be notified of attention markers related to patients that are at high risk of dropping out. The threshold values for activity levels have been set to highlight patients at medium (current activity level below 60% of expected) and high risk (below 40%), visualized with yellow and red warnings, respectively, for the health coach (<xref ref-type="fig" rid="figure7">Figure 7</xref>). The random forest model for dropout will show a yellow warning as the risk increases to above 60% and red if the risk is above 75%. The thresholds were selected based on an assessment of the patient distributions in collaboration with the health coaches.</p>
        <fig id="figure6" position="float">
          <label>Figure 6</label>
          <caption>
            <p>Receiver operating characteristic curve with area under the curve for the random forest model on the holdout test data.</p>
          </caption>
          <graphic xlink:href="jmir_v21i9e13617_fig6.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <table-wrap position="float" id="table4">
          <label>Table 4</label>
          <caption>
            <p>Area under the curve (AUC) and Gini index for the receiver operating characteristic on the 3 applied best performing models.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="500"/>
            <col width="250"/>
            <col width="250"/>
            <thead>
              <tr valign="top">
                <td>Model</td>
                <td>AUC</td>
                <td>Gini</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Logistic regression</td>
                <td>0.84</td>
                <td>0.68</td>
              </tr>
              <tr valign="top">
                <td>Decision trees</td>
                <td>0.82</td>
                <td>0.64</td>
              </tr>
              <tr valign="top">
                <td>Random forest</td>
                <td>0.92</td>
                <td>0.84</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
        <fig id="figure7" position="float">
          <label>Figure 7</label>
          <caption>
            <p>Histogram of current activity level (%; calculated using Equation 1) compared with forecasted activity based on the linear overall population trend line. Only patients with at least 6 weeks on the platform included</p>
          </caption>
          <graphic xlink:href="jmir_v21i9e13617_fig7.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <p>This study applies real-world data from chronic lifestyle disease patients enrolled in an eHealth lifestyle intervention in municipal settings in Denmark. The findings show promising results in terms of applying data mining methods for the prediction of dropouts in eHealth interventions with high precision. To summarize, the following 4 key takeaway points were made clear in this study:</p>
        <list list-type="order">
          <list-item>
            <p>Patients are at the highest risk of dropout at the beginning of the intervention. Most dropouts occurred in the first part of the intervention, and evidence from other studies support the finding that when participants dropout, they do so early. For instance, 65% of dropouts from a diet and physical activity short message service text message program occurred within the first 2 weeks [<xref ref-type="bibr" rid="ref21">21</xref>].</p>
          </list-item>
          <list-item>
            <p>Attrition is not an abrupt process but something that happens over time. We found that patients reduce their activity in the platform significantly in the weeks leading up to their dropout. Therefore, being aware of abnormal decreases in activity should be a good indicator for health coaches to initiate re-engagement.</p>
          </list-item>
          <list-item>
            <p>Dropout is primarily related to the program provider, outline of the intervention, activity in the platform (engagement), and, to a lower degree, the demographic variables available in this study. Multiple other studies have found attrition to be related to demographic variables [<xref ref-type="bibr" rid="ref22">22</xref>,<xref ref-type="bibr" rid="ref15">15</xref>], but these were not available in this dataset.</p>
          </list-item>
          <list-item>
            <p>Predicting activity level and risk of dropout can enable personalized advices and goal settings. Our findings strongly suggest that dropouts can be predicted, and personalized coaching can be supported by several parameters. However, there are some limitations to the study that will be discussed in the following section.</p>
          </list-item>
        </list>
      </sec>
      <sec>
        <title>Limitations</title>
        <p>The definition of dropout was based on patients not using the platform for 4 consecutive weeks. However, this might also include patients who stop using the platform because of having achieved their desired goal or goals, for example, behavior change or weight loss, or because the advisor has terminated the patient for other reasons. Termination reasons have been implemented in the platform following this study. In addition, the length of the intervention program offered by the provider was varying and unknown. This influences the likelihood of patients being incorrectly labelled as dropouts and should therefore be taken into consideration for the definition of the matter.</p>
        <p>Dropouts in the first 14 days were excluded from this study because of large uncertainties in the reason for dropout. As this group of patients was very diverse and the amount of data were primarily limited to their sign-up registration, it provided a restricted dataset for analysis. This suggests a possibility for a future study to look into these very early dropouts.</p>
        <p>The reliability of the findings is limited by some of the applied data (weight loss and BMI) being PROMs and most of the variables being self-reported by the patients. The data are, to some degree, validated by the health coaches, and extreme outliers are automatically marked as unrealistic by the Liva Healthcare system, but it has not been clinically verified.</p>
        <p>The activity analysis was based on a simple linear regression despite the pattern being shaped as a higher-order polynomial. A simple approach was sufficient for the focus of this study; however, further investigations into activity patterns showed large variations in individual patterns. For future studies, time series analysis on the activity patterns of a larger population could potentially be the basis for interesting risk models that could analyze activity levels for distinct patient types.</p>
      </sec>
      <sec>
        <title>Adoption and Implementation</title>
        <p>The insights obtained from our work are not sufficient to maintain engaged patients on the platform. The knowledge must be put into action to have an effect on the attrition rates. The discoveries must be diffused among the health coaches using the eHealth platform but should also be integrated into the system to support them to the highest degree possible.</p>
        <p>The finding that dropout is not an abrupt process but something that happens over time underlines the importance of the health coaches being warned of dropout risk to actively attempt a prevention of attrition. From a practical computational perspective, a random forest is a computational and expensive model, and depending on the nature of the intervention, the frequency of registrations by the patients, and the technical setup, it might not be applicable. Nevertheless, if the model is not expected to change frequently, then calculations can be performed, for example, every night, and can be used as the daily baseline for dropout risk in the advisors’ overview. Otherwise, more simple models such as logistic regression might be preferred.</p>
        <p>Previous studies have shown adherence to be closely related to the level of engagement in the platform, that is, by participating in an online forum [<xref ref-type="bibr" rid="ref13">13</xref>]. Socioeconomic status, occupation, and educational status have shown to be related to dropout [<xref ref-type="bibr" rid="ref15">15</xref>], but this type of data have not been available in sufficient quantities for this research. Diagnosis and condition should also be included in future models. Utilizing these data types would provide important information for the models and likely increase the accuracy and possibly make distinct patient profiles clear. Thus, this added data could be used to further enhance and individualize the models.</p>
        <p>The provider of the intervention was found to be the most significant predictor of dropout, together with inactivity on the platform. This indicates that efforts toward preventing attrition should be targeted at providers to the same degree as patients. Provider-specific attrition models may perform even better than the generic approach proposed in this work. Further insights into the providers and their strategies are required.</p>
        <p>Finally, to better validate the warnings that have been implemented into the platform, it requires a randomized setup or a less diverse population that is spread across multiple providers with varying programs. However, initial feedback from the health coaches is very positive, and the dropout rate for patients that have entered into the program after the date of data collection for this study is at only 19.3% (N=6402) compared with 54.0% for the population included in this study.</p>
      </sec>
      <sec>
        <title>Perspectives</title>
        <p>This study contributes to the literature on adherence and nonusage attrition in eHealth by analyzing activity patterns, assessing various methods and predictor variables for predicting dropout in a chronic patient lifestyle intervention, and proposing some perspectives for implementation. We expect future research and development in eHealth to apply data mining methods in the process of tailoring information to patients in a higher degree to achieve personalized interventions as the field of digital health continues to evolve. Ongoing research is currently assessing how lifestyle interventions can be tailored to the individual patient [<xref ref-type="bibr" rid="ref23">23</xref>], and as artificial intelligence is gaining ground within health care, we expect to see interventions, treatment, and guidance being selected based on the most suited for the specific individual patient profile in the future.</p>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>It is possible to apply methods from data mining in the context of predicting dropouts in an eHealth setting. Stratified cross-validation shows that patients at high risk of dropout can be predicted with 89% precision using a random forest model. Computational simpler models, such as logistic regression, are applicable as well but might produce less precise predictions. The risk of dropout can be visualized as warnings for the health coaches, so they can attempt to re-engage the patient in their intervention before dropout. Initial assessment of the models implemented in an eHealth platform in use shows a decrease in dropout rate. Obtaining more rich data on educational status and socioeconomic factors in combination with a better delineation of dropouts would increase the quality of the models.</p>
      </sec>
    </sec>
  </body>
  <back>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">AUC</term>
          <def>
            <p>area under the curve</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">BMI</term>
          <def>
            <p>body mass index</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">eHealth</term>
          <def>
            <p>electronic health</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">PROMs</term>
          <def>
            <p>patient-reported outcome measures</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">ROC</term>
          <def>
            <p>receiver operating characteristic</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>DHP analyzed the data, created and evaluated the models, and drafted the paper. MM, TS, and CS supervised the data analyses. MM, TS, and CS gave inputs on several drafts for the manuscript. All authors read, critically revised, and approved the final manuscript. Liva Healthcare provided the data and allocated resources to conduct and assist in the research and creation of this paper. The publishing of this paper was funded by the University of Southern Denmark, Health Informatics.</p>
    </ack>
    <fn-group>
      <fn fn-type="conflict">
        <p>DHP and CS are employed by Liva Healthcare A/S.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gemmill</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <source>CiteSeerX</source>
          <year>2008</year>
          <comment>Research Note: Chronic Disease Management in Europe 
          <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.619.4434&amp;rep=rep1&amp;type=pdf">http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.619.4434&amp;rep=rep1&amp;type=pdf</ext-link></comment>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Fiordelli</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Diviani</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Schulz</surname>
              <given-names>PJ</given-names>
            </name>
          </person-group>
          <article-title>Mapping mhealth research: a decade of evolution</article-title>
          <source>J Med Internet Res</source>
          <year>2013</year>
          <month>05</month>
          <day>21</day>
          <volume>15</volume>
          <issue>5</issue>
          <fpage>e95</fpage>
          <pub-id pub-id-type="doi">10.2196/jmir.2430</pub-id>
          <pub-id pub-id-type="medline">23697600</pub-id>
          <pub-id pub-id-type="pii">v15i5e95</pub-id>
          <pub-id pub-id-type="pmcid">PMC3668610</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Brandt</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Brandt</surname>
              <given-names>CJ</given-names>
            </name>
            <name name-style="western">
              <surname>Glintborg</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Arendal</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Toubro</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Brandt</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Sustained weight loss during 20 months using a personalized interactive internet based dietician advice program in a general practice setting</article-title>
          <source>Int J Adv Life Sci</source>
          <year>2011</year>
          <volume>3</volume>
          <issue>1-2</issue>
          <fpage>23</fpage>
          <lpage>8</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.researchgate.net/publication/228480051_Sustained_Weight_Loss_during_20_Months_using_a_Personalized_Interactive_Internet_Based_Dietician_Advice_Program_in_a_General_Practice_Setting"/>
          </comment>
          <pub-id pub-id-type="pmcid">PMC4016832</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Afshin</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Babalola</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Mclean</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Yu</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Ma</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>CY</given-names>
            </name>
            <name name-style="western">
              <surname>Arabi</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Mozaffarian</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Information technology and lifestyle: a systematic evaluation of internet and mobile interventions for improving diet, physical activity, obesity, tobacco, and alcohol use</article-title>
          <source>J Am Heart Assoc</source>
          <year>2016</year>
          <month>08</month>
          <day>31</day>
          <volume>5</volume>
          <issue>9</issue>
          <fpage>pii: e003058</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.ahajournals.org/doi/full/10.1161/JAHA.115.003058?url_ver=Z39.88-2003&amp;rfr_id=ori:rid:crossref.org&amp;rfr_dat=cr_pub%3dpubmed"/>
          </comment>
          <pub-id pub-id-type="doi">10.1161/JAHA.115.003058</pub-id>
          <pub-id pub-id-type="medline">27581172</pub-id>
          <pub-id pub-id-type="pii">JAHA.115.003058</pub-id>
          <pub-id pub-id-type="pmcid">PMC5079005</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Coiera</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Telehealth and mobile health</article-title>
          <source>Guide to Health Informatics. Third Edition</source>
          <year>2015</year>
          <publisher-loc>Florida, United States</publisher-loc>
          <publisher-name>CRC Press</publisher-name>
          <fpage>343</fpage>
          <lpage>69</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mistry</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Systematic review of studies of the cost-effectiveness of telemedicine and telecare. Changes in the economic evidence over twenty years</article-title>
          <source>J Telemed Telecare</source>
          <year>2012</year>
          <month>01</month>
          <volume>18</volume>
          <issue>1</issue>
          <fpage>1</fpage>
          <lpage>6</lpage>
          <pub-id pub-id-type="doi">10.1258/jtt.2011.110505</pub-id>
          <pub-id pub-id-type="medline">22101609</pub-id>
          <pub-id pub-id-type="pii">jtt.2011.110505</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Free</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Phillips</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Watson</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Galli</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Felix</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Edwards</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Patel</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Haines</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>The effectiveness of mobile-health technologies to improve health care service delivery processes: a systematic review and meta-analysis</article-title>
          <source>PLoS Med</source>
          <year>2013</year>
          <volume>10</volume>
          <issue>1</issue>
          <fpage>e1001363</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://dx.plos.org/10.1371/journal.pmed.1001363"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pmed.1001363</pub-id>
          <pub-id pub-id-type="medline">23458994</pub-id>
          <pub-id pub-id-type="pii">PMEDICINE-D-12-00641</pub-id>
          <pub-id pub-id-type="pmcid">PMC3566926</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Eysenbach</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>The law of attrition</article-title>
          <source>J Med Internet Res</source>
          <year>2005</year>
          <month>03</month>
          <day>31</day>
          <volume>7</volume>
          <issue>1</issue>
          <fpage>e11</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.jmir.org/2005/1/e11/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/jmir.7.1.e11</pub-id>
          <pub-id pub-id-type="medline">15829473</pub-id>
          <pub-id pub-id-type="pii">v7e11</pub-id>
          <pub-id pub-id-type="pmcid">PMC1550631</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lie</surname>
              <given-names>SS</given-names>
            </name>
            <name name-style="western">
              <surname>Karlsen</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Oord</surname>
              <given-names>ER</given-names>
            </name>
            <name name-style="western">
              <surname>Graue</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Oftedal</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>Dropout from an ehealth intervention for adults with type 2 diabetes: a qualitative study</article-title>
          <source>J Med Internet Res</source>
          <year>2017</year>
          <month>05</month>
          <day>30</day>
          <volume>19</volume>
          <issue>5</issue>
          <fpage>e187</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.jmir.org/2017/5/e187/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/jmir.7479</pub-id>
          <pub-id pub-id-type="medline">28559223</pub-id>
          <pub-id pub-id-type="pii">v19i5e187</pub-id>
          <pub-id pub-id-type="pmcid">PMC5470008</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Melville</surname>
              <given-names>KM</given-names>
            </name>
            <name name-style="western">
              <surname>Casey</surname>
              <given-names>LM</given-names>
            </name>
            <name name-style="western">
              <surname>Kavanagh</surname>
              <given-names>DJ</given-names>
            </name>
          </person-group>
          <article-title>Dropout from internet-based treatment for psychological disorders</article-title>
          <source>Br J Clin Psychol</source>
          <year>2010</year>
          <month>11</month>
          <volume>49</volume>
          <issue>Pt 4</issue>
          <fpage>455</fpage>
          <lpage>71</lpage>
          <pub-id pub-id-type="doi">10.1348/014466509X472138</pub-id>
          <pub-id pub-id-type="medline">19799804</pub-id>
          <pub-id pub-id-type="pii">bjcp840</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Christensen</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Griffiths</surname>
              <given-names>KM</given-names>
            </name>
            <name name-style="western">
              <surname>Farrer</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Adherence in internet interventions for anxiety and depression</article-title>
          <source>J Med Internet Res</source>
          <year>2009</year>
          <month>04</month>
          <day>24</day>
          <volume>11</volume>
          <issue>2</issue>
          <fpage>e13</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.jmir.org/2009/2/e13/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/jmir.1194</pub-id>
          <pub-id pub-id-type="medline">19403466</pub-id>
          <pub-id pub-id-type="pii">v11i2e13</pub-id>
          <pub-id pub-id-type="pmcid">PMC2762797</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Moroshko</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Brennan</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>O'Brien</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Predictors of dropout in weight loss interventions: a systematic review of the literature</article-title>
          <source>Obes Rev</source>
          <year>2011</year>
          <month>11</month>
          <volume>12</volume>
          <issue>11</issue>
          <fpage>912</fpage>
          <lpage>34</lpage>
          <pub-id pub-id-type="doi">10.1111/j.1467-789X.2011.00915.x</pub-id>
          <pub-id pub-id-type="medline">21815990</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Richardson</surname>
              <given-names>CR</given-names>
            </name>
            <name name-style="western">
              <surname>Buis</surname>
              <given-names>LR</given-names>
            </name>
            <name name-style="western">
              <surname>Janney</surname>
              <given-names>AW</given-names>
            </name>
            <name name-style="western">
              <surname>Goodrich</surname>
              <given-names>DE</given-names>
            </name>
            <name name-style="western">
              <surname>Sen</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Hess</surname>
              <given-names>ML</given-names>
            </name>
            <name name-style="western">
              <surname>Mehari</surname>
              <given-names>KS</given-names>
            </name>
            <name name-style="western">
              <surname>Fortlage</surname>
              <given-names>LA</given-names>
            </name>
            <name name-style="western">
              <surname>Resnick</surname>
              <given-names>PJ</given-names>
            </name>
            <name name-style="western">
              <surname>Zikmund-Fisher</surname>
              <given-names>BJ</given-names>
            </name>
            <name name-style="western">
              <surname>Strecher</surname>
              <given-names>VJ</given-names>
            </name>
            <name name-style="western">
              <surname>Piette</surname>
              <given-names>JD</given-names>
            </name>
          </person-group>
          <article-title>An online community improves adherence in an internet-mediated walking program. Part 1: results of a randomized controlled trial</article-title>
          <source>J Med Internet Res</source>
          <year>2010</year>
          <month>12</month>
          <day>17</day>
          <volume>12</volume>
          <issue>4</issue>
          <fpage>e71</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.jmir.org/2010/4/e71/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/jmir.1338</pub-id>
          <pub-id pub-id-type="medline">21169160</pub-id>
          <pub-id pub-id-type="pii">v12i4e71</pub-id>
          <pub-id pub-id-type="pmcid">PMC3056526</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>von Brachel</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Hötzel</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Hirschfeld</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Rieger</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Schmidt</surname>
              <given-names>U</given-names>
            </name>
            <name name-style="western">
              <surname>Kosfelder</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Hechler</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Schulte</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Vocks</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Internet-based motivation program for women with eating disorders: eating disorder pathology and depressive mood predict dropout</article-title>
          <source>J Med Internet Res</source>
          <year>2014</year>
          <month>03</month>
          <day>31</day>
          <volume>16</volume>
          <issue>3</issue>
          <fpage>e92</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.jmir.org/2014/3/e92/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/jmir.3104</pub-id>
          <pub-id pub-id-type="medline">24686856</pub-id>
          <pub-id pub-id-type="pii">v16i3e92</pub-id>
          <pub-id pub-id-type="pmcid">PMC4004149</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kannisto</surname>
              <given-names>KA</given-names>
            </name>
            <name name-style="western">
              <surname>Korhonen</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Adams</surname>
              <given-names>CE</given-names>
            </name>
            <name name-style="western">
              <surname>Koivunen</surname>
              <given-names>MH</given-names>
            </name>
            <name name-style="western">
              <surname>Vahlberg</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Välimäki</surname>
              <given-names>MA</given-names>
            </name>
          </person-group>
          <article-title>Factors associated with dropout during recruitment and follow-up periods of a mhealth-based randomized controlled trial for mobile.net to encourage treatment adherence for people with serious mental health problems</article-title>
          <source>J Med Internet Res</source>
          <year>2017</year>
          <month>02</month>
          <day>21</day>
          <volume>19</volume>
          <issue>2</issue>
          <fpage>e46</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.jmir.org/2017/2/e46/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/jmir.6417</pub-id>
          <pub-id pub-id-type="medline">28223262</pub-id>
          <pub-id pub-id-type="pii">v19i2e46</pub-id>
          <pub-id pub-id-type="pmcid">PMC5340923</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ameri</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Fard</surname>
              <given-names>MJ</given-names>
            </name>
            <name name-style="western">
              <surname>Chinnam</surname>
              <given-names>RB</given-names>
            </name>
            <name name-style="western">
              <surname>Reddy</surname>
              <given-names>CK</given-names>
            </name>
          </person-group>
          <article-title>Survival Analysis Based Framework for Early Prediction of Student Dropouts</article-title>
          <source>Proceedings of the 25th ACM International on Conference on Information and Knowledge Management</source>
          <year>2016</year>
          <conf-name>CIKM'16</conf-name>
          <conf-date>October 24-28, 2016</conf-date>
          <conf-loc>Indianapolis, Indiana, USA</conf-loc>
          <fpage>903</fpage>
          <lpage>12</lpage>
          <pub-id pub-id-type="doi">10.1145/2983323.2983351</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rovira</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Puertas</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Igual</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Data-driven system to predict academic grades and dropout</article-title>
          <source>PLoS One</source>
          <year>2017</year>
          <volume>12</volume>
          <issue>2</issue>
          <fpage>e0171207</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://dx.plos.org/10.1371/journal.pone.0171207"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0171207</pub-id>
          <pub-id pub-id-type="medline">28196078</pub-id>
          <pub-id pub-id-type="pii">PONE-D-16-42051</pub-id>
          <pub-id pub-id-type="pmcid">PMC5308611</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Donkin</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Christensen</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Naismith</surname>
              <given-names>SL</given-names>
            </name>
            <name name-style="western">
              <surname>Neal</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Hickie</surname>
              <given-names>IB</given-names>
            </name>
            <name name-style="western">
              <surname>Glozier</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <article-title>A systematic review of the impact of adherence on the effectiveness of e-therapies</article-title>
          <source>J Med Internet Res</source>
          <year>2011</year>
          <month>08</month>
          <day>5</day>
          <volume>13</volume>
          <issue>3</issue>
          <fpage>e52</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.jmir.org/2011/3/e52/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/jmir.1772</pub-id>
          <pub-id pub-id-type="medline">21821503</pub-id>
          <pub-id pub-id-type="pii">v13i3e52</pub-id>
          <pub-id pub-id-type="pmcid">PMC3222162</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Shearer</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>The CRISP-DM model: the new blueprint for data mining</article-title>
          <source>J Data Warehous</source>
          <year>2000</year>
          <volume>5</volume>
          <issue>4</issue>
          <fpage>13</fpage>
          <lpage>22</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.scirp.org/(S(vtj3fa45qm1ean45vvffcz55))/reference/ReferencesPapers.aspx?ReferenceID=1592780"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Akaike</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>A new look at the statistical model identification</article-title>
          <source>IEEE T Automat Contr</source>
          <year>1974</year>
          <month>12</month>
          <volume>19</volume>
          <issue>6</issue>
          <fpage>716</fpage>
          <lpage>23</lpage>
          <pub-id pub-id-type="doi">10.1109/TAC.1974.1100705</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Coa</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Patrick</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Baseline motivation type as a predictor of dropout in a healthy eating text messaging program</article-title>
          <source>JMIR Mhealth Uhealth</source>
          <year>2016</year>
          <month>09</month>
          <day>29</day>
          <volume>4</volume>
          <issue>3</issue>
          <fpage>e114</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://mhealth.jmir.org/2016/3/e114/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/mhealth.5992</pub-id>
          <pub-id pub-id-type="medline">27688034</pub-id>
          <pub-id pub-id-type="pii">v4i3e114</pub-id>
          <pub-id pub-id-type="pmcid">PMC5064093</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Grutzmacher</surname>
              <given-names>SK</given-names>
            </name>
            <name name-style="western">
              <surname>Munger</surname>
              <given-names>AL</given-names>
            </name>
            <name name-style="western">
              <surname>Speirs</surname>
              <given-names>KE</given-names>
            </name>
            <name name-style="western">
              <surname>Vafai</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Hilberg</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Duru</surname>
              <given-names>EB</given-names>
            </name>
            <name name-style="western">
              <surname>Worthington</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Lachenmayr</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Predicting attrition in a text-based nutrition education program: survival analysis of Text2BHealthy</article-title>
          <source>JMIR Mhealth Uhealth</source>
          <year>2019</year>
          <month>01</month>
          <day>21</day>
          <volume>7</volume>
          <issue>1</issue>
          <fpage>e9967</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://mhealth.jmir.org/2019/1/e9967/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/mhealth.9967</pub-id>
          <pub-id pub-id-type="medline">30664489</pub-id>
          <pub-id pub-id-type="pii">v7i1e9967</pub-id>
          <pub-id pub-id-type="pmcid">PMC6360389</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Elissen</surname>
              <given-names>AM</given-names>
            </name>
            <name name-style="western">
              <surname>Hertroijs</surname>
              <given-names>FD</given-names>
            </name>
            <name name-style="western">
              <surname>Schaper</surname>
              <given-names>CN</given-names>
            </name>
            <name name-style="western">
              <surname>Vrijhoef</surname>
              <given-names>JH</given-names>
            </name>
            <name name-style="western">
              <surname>Ruwaard</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Profiling patients' healthcare needs to support integrated, person-centered models for long-term disease management (profile): research design</article-title>
          <source>Int J Integr Care</source>
          <year>2016</year>
          <month>04</month>
          <day>29</day>
          <volume>16</volume>
          <issue>2</issue>
          <fpage>1</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/27616957"/>
          </comment>
          <pub-id pub-id-type="doi">10.5334/ijic.2208</pub-id>
          <pub-id pub-id-type="medline">27616957</pub-id>
          <pub-id pub-id-type="pmcid">PMC5015555</pub-id>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
