<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMIR</journal-id>
      <journal-id journal-id-type="nlm-ta">J Med Internet Res</journal-id>
      <journal-title>Journal of Medical Internet Research</journal-title>
      <issn pub-type="epub">1438-8871</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v25i1e43664</article-id>
      <article-id pub-id-type="pmid">37079370</article-id>
      <article-id pub-id-type="doi">10.2196/43664</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Exploring the Relationship Between Privacy and Utility in Mobile Health: Algorithm Development and Validation via Simulations of Federated Learning, Differential Privacy, and External Attacks</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Mavragani</surname>
            <given-names>Amaryllis</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Jeem</surname>
            <given-names>Yaltafit</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Bonomi</surname>
            <given-names>Luca</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Shen</surname>
            <given-names>Alexander</given-names>
          </name>
          <degrees>MSc</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff2" ref-type="aff">2</xref>
          <address>
            <institution>Department of Statistics and Data Science</institution>
            <institution>Carnegie Mellon University</institution>
            <addr-line>5000 Forbes Ave</addr-line>
            <addr-line>Pittsburgh, PA, 15213</addr-line>
            <country>United States</country>
            <phone>1 7022754242</phone>
            <email>alexshen@umich.edu</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-2607-3403</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author">
          <name name-style="western">
            <surname>Francisco</surname>
            <given-names>Luke</given-names>
          </name>
          <degrees>BA</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-7064-1952</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Sen</surname>
            <given-names>Srijan</given-names>
          </name>
          <degrees>MD, PhD</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <xref rid="aff4" ref-type="aff">4</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-4495-495X</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Tewari</surname>
            <given-names>Ambuj</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff5" ref-type="aff">5</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-6969-7844</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Department of Statistics</institution>
        <institution>University of Michigan</institution>
        <addr-line>Ann Arbor, MI</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Department of Statistics and Data Science</institution>
        <institution>Carnegie Mellon University</institution>
        <addr-line>Pittsburgh, PA</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>Eisenberg Family Depression Center</institution>
        <institution>University of Michigan</institution>
        <addr-line>Ann Arbor, MI</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff4">
        <label>4</label>
        <institution>Molecular and Behavioral Neuroscience Institute</institution>
        <institution>University of Michigan</institution>
        <addr-line>Ann Arbor, MI</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff5">
        <label>5</label>
        <institution>Department of Electrical Engineering and Computer Science</institution>
        <institution>University of Michigan</institution>
        <addr-line>Ann Arbor, MI</addr-line>
        <country>United States</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Alexander Shen <email>alexshen@umich.edu</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <year>2023</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>20</day>
        <month>4</month>
        <year>2023</year>
      </pub-date>
      <volume>25</volume>
      <elocation-id>e43664</elocation-id>
      <history>
        <date date-type="received">
          <day>19</day>
          <month>10</month>
          <year>2022</year>
        </date>
        <date date-type="rev-request">
          <day>11</day>
          <month>1</month>
          <year>2023</year>
        </date>
        <date date-type="rev-recd">
          <day>31</day>
          <month>1</month>
          <year>2023</year>
        </date>
        <date date-type="accepted">
          <day>19</day>
          <month>2</month>
          <year>2023</year>
        </date>
      </history>
      <copyright-statement>©Alexander Shen, Luke Francisco, Srijan Sen, Ambuj Tewari. Originally published in the Journal of Medical Internet Research (https://www.jmir.org), 20.04.2023.</copyright-statement>
      <copyright-year>2023</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in the Journal of Medical Internet Research, is properly cited. The complete bibliographic information, a link to the original publication on https://www.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://www.jmir.org/2023/1/e43664" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Although evidence supporting the feasibility of large-scale mobile health (mHealth) systems continues to grow, privacy protection remains an important implementation challenge. The potential scale of publicly available mHealth applications and the sensitive nature of the data involved will inevitably attract unwanted attention from adversarial actors seeking to compromise user privacy. Although privacy-preserving technologies such as federated learning (FL) and differential privacy (DP) offer strong theoretical guarantees, it is not clear how such technologies actually perform under real-world conditions.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>Using data from the University of Michigan Intern Health Study (IHS), we assessed the privacy protection capabilities of FL and DP against the trade-offs in the associated model’s accuracy and training time. Using a simulated external attack on a target mHealth system, we aimed to measure the effectiveness of such an attack under various levels of privacy protection on the target system and measure the costs to the target system’s performance associated with the chosen levels of privacy protection.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>A neural network classifier that attempts to predict IHS participant daily mood ecological momentary assessment score from sensor data served as our target system. An external attacker attempted to identify participants whose average mood ecological momentary assessment score is lower than the global average. The attack followed techniques in the literature, given the relevant assumptions about the abilities of the attacker. For measuring attack effectiveness, we collected attack success metrics (area under the curve [AUC], positive predictive value, and sensitivity), and for measuring privacy costs, we calculated the target model training time and measured the model utility metrics. Both sets of metrics are reported under varying degrees of privacy protection on the target.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>We found that FL alone does not provide adequate protection against the privacy attack proposed above, where the attacker’s AUC in determining which participants exhibit lower than average mood is over 0.90 in the worst-case scenario. However, under the highest level of DP tested in this study, the attacker’s AUC fell to approximately 0.59 with only a 10% point decrease in the target’s <italic>R</italic><sup>2</sup> and a 43% increase in model training time. Attack positive predictive value and sensitivity followed similar trends. Finally, we showed that participants in the IHS most likely to require strong privacy protection are also most at risk from this particular privacy attack and subsequently stand to benefit the most from these privacy-preserving technologies.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>Our results demonstrated both the necessity of proactive privacy protection research and the feasibility of the current FL and DP methods implemented in a real mHealth scenario. Our simulation methods characterized the privacy-utility trade-off in our mHealth setup using highly interpretable metrics, providing a framework for future research into privacy-preserving technologies in data-driven health and medical applications.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>privacy</kwd>
        <kwd>data protection</kwd>
        <kwd>machine learning</kwd>
        <kwd>federated learning</kwd>
        <kwd>neural networks</kwd>
        <kwd>mobile health</kwd>
        <kwd>mHealth</kwd>
        <kwd>wearable electronic devices</kwd>
        <kwd>differential privacy</kwd>
        <kwd>privacy</kwd>
        <kwd>learning</kwd>
        <kwd>evidence</kwd>
        <kwd>feasibility</kwd>
        <kwd>applications</kwd>
        <kwd>training</kwd>
        <kwd>technology</kwd>
        <kwd>mobile phone</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <sec>
        <title>Background</title>
        <p>The rise of mobile health (mHealth) as an exciting and compelling health care paradigm has been unmistakable. As wearable devices continue to gain popularity and smartphone penetration continues to rise globally [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref2">2</xref>], opportunities for mHealth to make positive impacts on health care delivery and administration have proliferated. On the research side, the period between the years 2018 and 2020 has seen more mHealth-related publications than the previous years combined [<xref ref-type="bibr" rid="ref3">3</xref>]. Meanwhile, the current global mHealth market is expected to experience annual growth of 11% for the better part of this decade [<xref ref-type="bibr" rid="ref2">2</xref>].</p>
        <p>However, widespread adoption of mHealth technologies may not occur until the challenge of user data privacy is overcome [<xref ref-type="bibr" rid="ref4">4</xref>]. Recent polling has shown that the vast majority of Americans do not believe that the benefits of releasing personal data to private companies are worth the risks [<xref ref-type="bibr" rid="ref5">5</xref>]. Research has also shown that privacy concerns directly affect the willingness of users to participate in mHealth systems, especially younger users [<xref ref-type="bibr" rid="ref6">6</xref>] and users with diseases that carry social stigma [<xref ref-type="bibr" rid="ref7">7</xref>]. Along with the increased frequency of cyberattacks on centralized data centers and IT infrastructure and the growing number of smart devices that use these facilities [<xref ref-type="bibr" rid="ref8">8</xref>], the costs of insufficient security and privacy protection for future large-scale mHealth applications are abundantly clear.</p>
        <p>The collection of privacy-enhancing tools spans the entirety of the data collection and use pipeline, including approaches such as data policies, encryption, access control, and secure multiparty computation [<xref ref-type="bibr" rid="ref9">9</xref>]. In this study, we examined 2 popular algorithmic strategies (federated learning [FL] and differential privacy [DP]) and explored some of their shortcomings. Specifically, we demonstrated the trade-off between algorithmic utility and privacy protection when implementing these strategies in practical mHealth settings. We aimed to offer insights into the potential of these strategies to protect user data by constructing a simulated attack on a centralized server.</p>
      </sec>
      <sec>
        <title>FL Strategy</title>
        <p>FL is a machine learning method used when data are distributed across independent devices (often referred to as clients). In the traditional centralized learning regime, a data collector interested in constructing a statistical model for answering queries about the data would first aggregate all the data onto a central server before performing model optimization on all data simultaneously. However, in the FL setting, all data remain with the individual client, and only the statistical model resides on the central server. Optimization of the FL model occurs in a distributed fashion, where copies of the model are sent to clients and model <italic>updates</italic> are aggregated from clients to the central server at each optimization step. Here, individual updates are calculated using only the data on the client device.</p>
        <p>The exact implementation details of FL depend on the statistical model being constructed, but its main benefit over the centralized regime is that a security breach of the central server is far less serious. In the worst-case scenario, the attacker can only access historical communications between the central server and clients; none of which will contain any private user data. However, FL can reduce the utility of the central model [<xref ref-type="bibr" rid="ref10">10</xref>] and slow optimization depending on the availability of individual clients. Nevertheless, initial research on FL in an mHealth context has shown that models trained in a federated manner have only minor performance costs compared with models trained centrally [<xref ref-type="bibr" rid="ref11">11</xref>]. In this regard, FL would seem like a prime candidate for privacy protection in large-scale mHealth applications; however, we show that FL alone is not sufficient to defend against all privacy threats.</p>
      </sec>
      <sec>
        <title>DP Strategy</title>
        <p>DP aims to overcome the problems associated with information leakage that occur whenever a query is conducted on a private data set. In short, if the answer to a query (eg, What is the average age of patients in XYZ hospital?) given a fixed data set is deterministic, a clever attacker could combine a series of queries with auxiliary information to infer private information about individual records in the data set. This includes whether a particular record is present in the data set (membership inference) or features associated with a particular record (data reconstruction).</p>
        <p>DP protects against such attacks by adding a stochastic component to query outputs. For example, when the output is a continuous value, it is often accomplished by adding random noise to the answer. As there is no longer a deterministic mapping between data set properties and query outputs, the probability of an attacker successfully inferring private information is reduced. In conventional ε-δ DP, we say that a mechanism <italic>M</italic> that generates an output based on an input data set <italic>D</italic> is ε-δ differentially private if the following inequality holds:</p>
        <disp-formula><italic>Pr[M (D_1) ∈ A] ≤ e<sup>ε</sup> Pr[M (D_2) ∈ A] + δ</italic> <bold>(1)</bold></disp-formula>
        <p>where <italic>A</italic> is any set of possible output values and <italic>D<sub>1</sub></italic> and <italic>D<sub>2</sub></italic> differ only in the presence or absence of a single record [<xref ref-type="bibr" rid="ref12">12</xref>]. In other words, a differentially private construction provides strong bounds on the probability of a successful membership inference attack by enforcing that the model output is sufficiently close (defined by epsilon and delta) in distribution to what the model would have output if one user’s record was deleted from the data set.</p>
        <p>As with FL, the exact implementation of DP depends on the nature of the queries made on a private data set. DP is commonly used to control information leakage that results from exposing the parameters of the models trained on private data. For example, Abadi et al [<xref ref-type="bibr" rid="ref13">13</xref>] demonstrated how DP can be integrated into gradient descent algorithms for deep learning to achieve certain statistical guarantees of privacy. As with FL, the benefits of DP come at the cost of lower model utility, often embodied in lower model accuracy and longer optimization times.</p>
      </sec>
      <sec>
        <title>Previous Work</title>
        <p>FL has widespread application in health care settings. Using electronic health records distributed across hospital systems, researchers have successfully developed federated models to predict heart-related hospitalizations [<xref ref-type="bibr" rid="ref14">14</xref>], electrocardiogram classification [<xref ref-type="bibr" rid="ref15">15</xref>], and clinical outcomes in patients with COVID-19 [<xref ref-type="bibr" rid="ref16">16</xref>]. In addition to the results in a study by Liu et al [<xref ref-type="bibr" rid="ref11">11</xref>], other studies have shown that FL can be applied to wearable sensor data in biomedical applications [<xref ref-type="bibr" rid="ref17">17</xref>], a particular area of interest for mHealth research. Such studies generally find that the benefits of FL outweigh the costs.</p>
        <p>Similarly, there have been many uses of DP in health care applications [<xref ref-type="bibr" rid="ref18">18</xref>], including drug sensitivity prediction [<xref ref-type="bibr" rid="ref19">19</xref>] and coronary heart disease diagnosis [<xref ref-type="bibr" rid="ref20">20</xref>]. Other studies combined DP with FL to achieve enhanced privacy protection [<xref ref-type="bibr" rid="ref21">21</xref>]. However, such studies have not demonstrated a universal rule for choosing the ε-DP parameter in any given application. Hsu et al [<xref ref-type="bibr" rid="ref22">22</xref>] found that the optimal choices of ε in the existing literature span orders of magnitude depending on the context. Therefore, this study aimed to contribute to the DP literature by evaluating its efficacy in a novel health sensor data application.</p>
        <p>Although quantifying model utility is straightforward in most cases, quantifying privacy protection is usually imprecise. The benefits of FL are clear at the conceptual level (eliminating risks associated with centralizing data in one location), but how this translates to a quantifiable increase in privacy protection is unclear. Although DP provides strong statistical guarantees tied to numeric parameters (such as in  ε-δ DP), such guarantees only directly apply to a specific class of privacy attacks (membership inference attacks).</p>
        <p>Attempts have been made to measure the effectiveness of FL and DP against other types of attacks, most notably property inference attacks that aim to uncover the private attributes of the training data. For instance, Naseri et al [<xref ref-type="bibr" rid="ref23">23</xref>] constructed simulated privacy attacks against an image classifier trained on the Labeled Faces in the Wild data set. Even with the protection of FL and local DP, they found that these classifiers are still vulnerable to external property inference [<xref ref-type="bibr" rid="ref23">23</xref>]. Melis et al [<xref ref-type="bibr" rid="ref24">24</xref>] conducted similar experiments on both the Labeled Faces in the Wild data set and the Yelp reviews data set and arrived at a similar conclusion [<xref ref-type="bibr" rid="ref24">24</xref>]. However, both papers limit the number of clients in their FL setup to no more than 30, which is far below the scale of mHealth applications intended for public use. Furthermore, it is unclear whether their findings for image and text classifiers extend to other domains and data types, such as time series sensor data collected from wearable health devices.</p>
        <p>In this study, we measured the privacy-utility trade-off of FL and DP using wearable sensor data gathered from a large-scale clinical study of medical interns. We adopted an existing simulation-based methodology tailored to a realistic mHealth setup to understand how FL and DP might perform in this specific domain. To the best of our knowledge, this is the first study to (1) evaluate the effectiveness of FL and DP on a real-world health sensor data set, (2) use simulation-based methodology on time series sensor data at the scale of thousands of FL clients, and (3) characterize which FL clients are most at risk from these external attacks if privacy protection is insufficient.</p>
      </sec>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Ethics Approval</title>
        <p>The data used in this analysis come from the University of Michigan Intern Health Study, approved by the Institutional Review Boards of the University of Michigan Medical School (IRBMED), review number HUM00033029.</p>
      </sec>
      <sec>
        <title>Overview</title>
        <p>We must construct models of a target system and an external attack to evaluate the strength of privacy protection mechanisms for guarding against actual privacy attacks. The <italic>Target System</italic> is a prediction model existing on a central server that implements FL and DP in an attempt to protect against privacy threats. The <italic>External Attack</italic> simulates an adversarial actor intent on uncovering private information about individuals in the <italic>Target System</italic>. Enacting the <italic>External Attack</italic> against the <italic>Target System</italic> allows us to assess the performance of the Target System in terms of model utility and privacy protection.</p>
      </sec>
      <sec>
        <title>Data: Intern Health Study</title>
        <p>Data for this study were obtained from the 2017 to 2019 cohorts of the University of Michigan Intern Health Study (IHS) [<xref ref-type="bibr" rid="ref25">25</xref>]. IHS aims to investigate the biological and genetic factors affecting the relationship between stress and depression. The study followed medical interns at several dozen facilities in the United States and China. In addition to providing demographic information at the beginning of the study, participants were asked to wear a Fitbit device during their internship and complete daily mood ecological momentary assessments (EMAs) through a mobile app.</p>
        <p>The data originally covered 6660 registered participants with 1,241,629 daily sensor observations. After data cleaning (see <xref ref-type="table" rid="table1">Table 1</xref> for details), our final data set contained 4274 participants and 596,585 daily sensor observations. Although the IHS data included information about participants’ medical internships (such as specialty), we excluded this information to better simulate an mHealth system that would be used with the general population. The pertinent demographic features of the data are given in <xref ref-type="table" rid="table1">Tables 1</xref> and <xref ref-type="table" rid="table2">2</xref>, whereas <xref ref-type="table" rid="table3">Table 3</xref> shows the summary statistics for participant age, daily mood, and daily sensor data. <xref ref-type="table" rid="table4">Table 4</xref> summarizes the degree of missingness for each sensor measurement for all the daily observations.</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Participants and sensor data by cohort year.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="290"/>
            <col width="160"/>
            <col width="150"/>
            <col width="200"/>
            <col width="200"/>
            <thead>
              <tr valign="top">
                <td>Cohort year</td>
                <td>Participants (raw data; n=6660), n</td>
                <td>Participants (clean data<sup>a</sup>; n=4274), n</td>
                <td>Daily sensor observations (raw data; n=1,235,543<sup>b</sup>), n</td>
                <td>Daily sensor observations (clean data; n=596,585), n</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>2017</td>
                <td>2846</td>
                <td>531</td>
                <td>119,850</td>
                <td>83,999</td>
              </tr>
              <tr valign="top">
                <td>2018</td>
                <td>2129</td>
                <td>2098</td>
                <td>576,535</td>
                <td>277,754</td>
              </tr>
              <tr valign="top">
                <td>2019</td>
                <td>1685</td>
                <td>1645</td>
                <td>539,158</td>
                <td>234,832</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table1fn1">
              <p><sup>a</sup>Participants with no available sensor data and sensor observations not linked to the registered participants were removed. We also excluded 40 participants with invalid or missing age values, 2 participants with missing sex values, and 26 participants with missing ethnicity values. Sensor observations with no mood scores were excluded.</p>
            </fn>
            <fn id="table1fn2">
              <p><sup>b</sup>This number is slightly less than the original data set size because of observations not linked to registered participants.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>Breakdown of participants by ethnicity and gender (clean data).</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="400"/>
            <col width="230"/>
            <col width="370"/>
            <thead>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Male (n=1896), n (%)</td>
                <td>Female (n=2378), n (%)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Arab or Middle Eastern</td>
                <td>40 (2.11)</td>
                <td>37 (1.56)</td>
              </tr>
              <tr valign="top">
                <td>Asian (eg, Indian or Chinese)</td>
                <td>456 (24.05)</td>
                <td>524 (22.04)</td>
              </tr>
              <tr valign="top">
                <td>Black or African American</td>
                <td>73 (3.85)</td>
                <td>148 (6.22)</td>
              </tr>
              <tr valign="top">
                <td>Latino or Hispanic</td>
                <td>87 (4.59)</td>
                <td>81 (3.41)</td>
              </tr>
              <tr valign="top">
                <td>Multiracial</td>
                <td>156 (8.23)</td>
                <td>223 (9.38)</td>
              </tr>
              <tr valign="top">
                <td>Native American</td>
                <td>1 (0.05)</td>
                <td>3 (0.13)</td>
              </tr>
              <tr valign="top">
                <td>Other</td>
                <td>10 (0.53)</td>
                <td>8 (0.34)</td>
              </tr>
              <tr valign="top">
                <td>Pacific Islander</td>
                <td>1 (0.05)</td>
                <td>0 (0)</td>
              </tr>
              <tr valign="top">
                <td>White</td>
                <td>1072 (56.54)</td>
                <td>1354 (56.94)</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
        <table-wrap position="float" id="table3">
          <label>Table 3</label>
          <caption>
            <p>Descriptive statistics of daily sensor data and participant age (clean data).</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="420"/>
            <col width="150"/>
            <col width="430"/>
            <thead>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Values, mean (SD)</td>
                <td>Values, median (range)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Participant age (years)</td>
                <td>27.66 (2.62)</td>
                <td>27 (17-47)</td>
              </tr>
              <tr valign="top">
                <td>Mood ecological momentary assessment score</td>
                <td>7.27 (1.63)</td>
                <td>7 (1-10)</td>
              </tr>
              <tr valign="top">
                <td>In-bed minutes</td>
                <td>417.98 (137.59)</td>
                <td>437 (4-1379)</td>
              </tr>
              <tr valign="top">
                <td>Sleep minutes</td>
                <td>372.11 (123.81)</td>
                <td>389 (0-1228)</td>
              </tr>
              <tr valign="top">
                <td>Active minutes</td>
                <td>63.83 (92.6)</td>
                <td>25 (0-1016)</td>
              </tr>
              <tr valign="top">
                <td>Step count</td>
                <td>8819 (4739)</td>
                <td>8215 (1-70,138)</td>
              </tr>
              <tr valign="top">
                <td>Resting heart rate (beats/min)</td>
                <td>62.83 (7.63)</td>
                <td>63 (39-100)</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
        <table-wrap position="float" id="table4">
          <label>Table 4</label>
          <caption>
            <p>Daily sensor data missingness (clean data).</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="330"/>
            <col width="240"/>
            <col width="430"/>
            <thead>
              <tr valign="bottom">
                <td>
                  <break/>
                </td>
                <td>Days missing<sup>a</sup>, (n=596,585), n (%)</td>
                <td>Days of data per participant, mean (SD)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>In-bed minutes</td>
                <td>259,633 (43.52)</td>
                <td>78.84 (99.70)</td>
              </tr>
              <tr valign="top">
                <td>Sleep minutes</td>
                <td>259,633 (43.52)</td>
                <td>78.84 (99.70)</td>
              </tr>
              <tr valign="top">
                <td>Active minutes</td>
                <td>351,181 (58.87)</td>
                <td>57.42 (69.56)</td>
              </tr>
              <tr valign="top">
                <td>Step count</td>
                <td>186,189 (31.21)</td>
                <td>96.02 (107.44)</td>
              </tr>
              <tr valign="top">
                <td>Resting heart rate</td>
                <td>323,722 (54.26)</td>
                <td>63.84 (77.52)</td>
              </tr>
              <tr valign="top">
                <td>Combined features<sup>b</sup></td>
                <td>428,692 (71.86)</td>
                <td>39.28 (61.77)</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table4fn1">
              <p><sup>a</sup>Missingness was measured relative to the baseline obtained after removing all days with missing mood scores and all participants with missing demographic features or no sensor data.</p>
            </fn>
            <fn id="table4fn2">
              <p><sup>b</sup>A nonmissing day for this row is any day with all 5 sensor measurements recorded.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Data Preprocessing</title>
        <p>The most important step in preparing the IHS data for analysis was to deal with missing data. Although a typical medical internship lasted 12 months, the average participant in the study logged a bit more than 1 month (not necessarily continuous) of complete sensor data. Given our objective of building a real-world mHealth system, we could not exclude all observations with any missing features. Furthermore, exploratory tests demonstrated that neural network classifiers trained on only complete cases are not useful predictors of mood scores. Therefore, we imputed missing sensor features using 10 iterations of multivariate imputation by chained equations because of its flexibility and relatively low computational cost [<xref ref-type="bibr" rid="ref26">26</xref>]. We acknowledge that true FL would require us to impute data locally at the participant level, but data availability considerations led us to adopt a centralized imputation approach.</p>
        <p>Our feature set included the 9 features (cohort year, age, sex, ethnicity, and 5 sensor measurements) listed in the previous section, as well as 1-day and 2-day lags for each of the 5 sensor measurements and the mood score. Lags account for the possibility that certain sensor events (such as one night of poor sleep) may have a delayed impact on mood. In addition, we included 3 time-based features indicating the observation’s day of week, day of month, and day of year to account for unmeasured factors that may be correlated with time. Finally, we performed common preprocessing steps on our feature set, such as standardizing continuous features and splitting categorical features into component binary features.</p>
      </sec>
      <sec>
        <title>Target System Construction</title>
        <p>In the Target System, the statistical models live on the central server and use data from individual IHS participant devices for training. We formulated both regression and classification tasks for the Target System and trained models for each using our sensor and demographic data. These 2 tasks provide robustness in assessing the effectiveness of our privacy protection measures. The regression task predicts mood scores on the 1- to 10-point scale, whereas the binary classification task predicts whether the user’s mood has improved from the previous day.</p>
        <p>As we did not consider model interpretability in our analysis, we implemented neural networks for both tasks because of their flexibility and accuracy advantages over other machine learning methods. We also considered the fact that future mHealth systems open to the public are likely to collect data on a far larger scale than that obtained in the IHS. Although a full investigation of all possible model types for the Target System is outside the scope of this analysis, we presented comparisons with simpler linear methods (ordinary least squares for the regression task and logistic regression for the binary classification task) to justify our choice of neural networks. The details of the neural network implementation are provided in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>. This paper reports the results from the regression task and leaves the results from the binary prediction task in <xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>.</p>
      </sec>
      <sec>
        <title>External Attack: Underlying Assumptions</title>
        <p>Several methods can be designed to compromise user privacy through the Target System. For example, attackers interested in whether a specific user or record appears in the Target System may mount a <italic>membership inference attack</italic>, whereas those interested in ascertaining certain statistical properties of the private training data (either globally or on a per-user basis) may mount a <italic>property inference attack</italic>. Other attackers may attempt <italic>data reconstruction attacks</italic>, which aim to reconstruct partial or complete records from the original training data [<xref ref-type="bibr" rid="ref23">23</xref>]. Our attack model is derived from domain-specific considerations regarding the environment of mHealth applications, the privacy demands of system users, and the identities of possible attackers.</p>
        <p>In mHealth systems containing a large portion of the general population, membership inference has limited value, and the reconstruction of a specific user’s data is quite difficult. Therefore, we assume that the attacker is interested in property inference on individual user data, particularly whether an IHS participant has an average daily mood EMA score higher than the global average daily mood EMA score (denoted hereafter by <italic>mood status</italic>). Such inferences are relatively easy to execute and could expose sensitive information about the participants; for instance, those with consistently low mood scores may be at a higher risk of depression or other mental disorders. Regardless of the attacker’s exact purposes, the mere possibility of a successful inference of mood status may significantly undermine public trust in the mHealth system.</p>
        <p>The literature on privacy attacks provides 2 broad dimensions along which a privacy threat may be assessed based on the attacker’s resources. The first dimension is the attacker’s level of access to the Target System and any associated privacy protection systems. The literature often differentiates between <italic>black box</italic> model access, where the attacker is limited to viewing only the Target System’s model output for a given input, and <italic>white box</italic> access, where the attacker is able to view the model architecture and all related parameters, along with the details of any privacy protection mechanisms. The second dimension is the attacker’s ability to alter the model parameters for liking. Certain attacks can be implemented passively, meaning that the attacker can compromise user privacy by simply observing changes to the Target System’s statistical model that occur during training. Others require the attacker to actively influence the model parameters, usually by injecting customized training data into the system.</p>
        <p>We assumed that the attacker has white box access to the central server of the Target System (including the statistical model and all communications with the server) but can only carry out privacy attacks passively. We also assumed that the attacker has no ability to access any live individual user devices in the Target System (any devices actively participating in model training). These assumptions match the <italic>rogue employee</italic> profile, an insider who is easily able to access confidential details about the Target System but would not be able to effect changes in model parameters without raising suspicion. This profile was selected to balance plausibility with preparation for a worst-case scenario. Although it is unlikely that any adversarial actor could influence the training process of the Target System, we believe that any serious actor would likely gain insider access.</p>
        <p>Implicit in this assumption is the ineffectiveness of other commonly used privacy-preserving technologies outside FL and DP against this type of threat. We assumed that privacy policies and operational protocols would be of limited use against bad faith actors, whereas encryption and authentication technologies would not stop an attacker with insider access. This is not to say that such tools are not useful for ensuring user privacy and building trust. However, for the simplicity of this analysis, we assumed that such conventional methods are not effective and focus on the ability of FL and DP to stop such an attacker. We elaborate on the reasonableness of this assumption and implications for future research in the Discussion section.</p>
      </sec>
      <sec>
        <title>External Attack: Implementation</title>
        <sec>
          <title>Overview</title>
          <p>The implementation details of this attack follow those in the study by Melis et al [<xref ref-type="bibr" rid="ref24">24</xref>]. The attack was conducted in the 3 stages described in the following sections. For this analysis, we assumed that the statistical model in the Target System is trained using FL and local DP, implying that individual user devices do not pool their data centrally and add noise to their gradient updates before sending them to the central server. Other than the gradient updates, all user data, including sensor measurements, demographic data, and mood EMA scores, remain strictly on individual user devices. Additional details can be found in the <italic>Implementing FL and DP</italic> section.</p>
        </sec>
        <sec>
          <title>Stage 1: Accessing Target Model Parameters</title>
          <p>Given that the attacker has full access to all information stored on the central server at training time, we assumed that they observe the Target System model parameters at time <italic>t</italic> (given by <italic>θ<sub>t</sub></italic>) as well as the gradient updates (given by ∇<sub>i,t</sub>) with respect to <italic>θ<sub>t</sub></italic> sent to the server from individual user devices <italic>A<sub>i</sub></italic>. The relationship between these variables is shown on the left side of <xref rid="figure1" ref-type="fig">Figure 1</xref>.</p>
          <fig id="figure1" position="float">
            <label>Figure 1</label>
            <caption>
              <p>Attacker infiltration of central server.</p>
            </caption>
            <graphic xlink:href="jmir_v25i1e43664_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
          <p>The attacker first copies the entire Target Model (including parameters <italic>θ<sub>t</sub></italic>) and the gradient updates ∇<sub>i,t</sub>. The attacker ultimately attempts to predict the mood status of each user <italic>A<sub>i</sub></italic>. Note that under DP, the gradient updates ∇<sub>i,t</sub> observed by the attacker would include any noise added to the true model gradient on the user device.</p>
        </sec>
        <sec>
          <title>Stage 2: Constructing the Attacker’s Data Set</title>
          <p>To predict the mood status of live users, the attacker requires an auxiliary collection of users for whom the mood status is known, and model gradients with respect to <italic>θ<sub>t</sub></italic> can be calculated. We assumed that attackers could access such a data set because they already have access to the central server. These data could be sourced from central server databases containing raw data for pilot users, a small number of previously compromised user devices, an externally published data set, or even users working in collusion with the attacker. The process of using these data to construct the attacker’s data set is shown in <xref rid="figure2" ref-type="fig">Figure 2</xref>. In particular, each user in the auxiliary data has a known mood status and generates a gradient ∇’<sub>i,t</sub> with respect to the same Target Model parameter <italic>θ<sub>t</sub></italic>. This gradient contains the same amount of noise as the observed gradient updates from live users because we assumed that the parameters used for privacy protection in the Target System are stored on the central server. We reasoned this assumption is feasible for systems implementing FL and DP because they cannot broadcast the relevant parameters to all users while hiding them from adversaries with access to the central server. This increases the likelihood of a successful inference relative to if the noise parameter is unknown.</p>
          <fig id="figure2" position="float">
            <label>Figure 2</label>
            <caption>
              <p>Constructing the attacker’s data set.</p>
            </caption>
            <graphic xlink:href="jmir_v25i1e43664_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
        </sec>
        <sec>
          <title>Stage 3: Mood Status Prediction</title>
          <p>Finally, the records in the attacker’s data set for which mood status is observed can be used to train a binary batch property classifier, which predicts mood status (labels) from the provided gradients (features). This process is illustrated in <xref rid="figure3" ref-type="fig">Figure 3</xref>. Any machine learning model can be used in this step, but we adopted a neural network approach following the example in a study by Melis et al [<xref ref-type="bibr" rid="ref24">24</xref>]. The details of the implementation are provided in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p>
          <fig id="figure3" position="float">
            <label>Figure 3</label>
            <caption>
              <p>External attack training and final inference.</p>
            </caption>
            <graphic xlink:href="jmir_v25i1e43664_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
        </sec>
      </sec>
      <sec>
        <title>Implementing FL and DP</title>
        <p>Our proposed defense against external attacks uses local DP techniques in conjunction with FL to mask the information present in gradient updates communicated to the central server. The specific implementation follows the procedure given in Naseri et al [<xref ref-type="bibr" rid="ref23">23</xref>]. The general algorithm is illustrated in <xref rid="figure4" ref-type="fig">Figure 4</xref>.</p>
        <fig id="figure4" position="float">
          <label>Figure 4</label>
          <caption>
            <p>Pseudocode for privacy protection implementation. DP: differential privacy; FL: federated learning.</p>
          </caption>
          <graphic xlink:href="jmir_v25i1e43664_fig4.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <p>Our implementation of FL randomly sampled 30% of the study participants in each iteration to participate in the corresponding model update (set <italic>A</italic>). Our procedure is very similar to that of vanilla FL and differs only in the second and third steps (indicated with a star) of the inner loop, which are described as follows:</p>
        <list list-type="order">
          <list-item>
            <p>Clip each computed gradient to have <italic>L<sub>2</sub></italic> norm of at most <italic>C</italic>.</p>
          </list-item>
          <list-item>
            <p>Add gaussian noise with mean 0 and variance <italic>σ<sup>2</sup>C<sup>2</sup></italic> to each gradient component independently.</p>
          </list-item>
        </list>
        <p>The first step ensures that the signals within the gradient updates do not overpower the noise, whereas the second step provides the stochastic component that addresses information leakage resulting from deterministic gradient calculations. For this analysis, we set <italic>C=1</italic> for all cases and varied the <italic>σ</italic> parameter to achieve different privacy protection levels.</p>
        <p>Much of the existing literature on DP reported explicit ε and <italic>δ</italic> values based on the training process used for the Target System (including any noise added) as well as the properties of the training data. Although it is certainly possible to report our level of privacy protection in terms of ε (after fixing a value for <italic>δ</italic>), we chose to report <italic>σ</italic> (the noise scale) directly for a variety of reasons. The ε-<italic>δ</italic> guarantees of DP did not directly apply to property inference and hence our simulation-based approach to measuring the true level of privacy protection. The other main roles of ε and <italic>δ</italic> (communicating the degree of perturbation applied to the training process) could also be served by reporting the more interpretable noise scale parameter <italic>σ</italic>. We varied only the <italic>σ</italic> parameter in the course of the analysis, creating an effective one-to-one correspondence between ε and <italic>σ</italic> values. Given that the main stakeholders for large-scale mHealth systems were health care practitioners and the public, we believed that the interpretability of our analysis should take precedence over theoretical rigor.</p>
        <p>Other parameters remain identical to those outlined in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> for the Target System.</p>
      </sec>
      <sec>
        <title>Performance Versus Privacy Protection: A Simulation Approach</title>
        <sec>
          <title>Overview</title>
          <p>In general, it is difficult to quantify the effectiveness of an <italic>External Attack</italic> against the <italic>Target System</italic>; therefore, we used a simulation-based approach to report results for several different attacker settings. We varied σ, the noise parameter in constructing the stochastic gradients, to test different levels of privacy protection. We also modeled differences in the attacker’s access to auxiliary user data by changing <italic>q</italic>, the proportion of IHS study participants with compromised devices whose data are available to the attacker. <xref rid="figure5" ref-type="fig">Figure 5</xref> shows the basic simulation setup used in the analyses.</p>
          <fig id="figure5" position="float">
            <label>Figure 5</label>
            <caption>
              <p>Pseudocode for our assessment of external attack effectiveness. DP: differential privacy.</p>
            </caption>
            <graphic xlink:href="jmir_v25i1e43664_fig5.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
        </sec>
        <sec>
          <title>Measuring Costs of Privacy Protection</title>
          <p>Higher levels of privacy protection generally hinder the ability of the Target System to train the model effectively (because the gradients have more added noise). We measured this cost in terms of the training time and final model utility for different values of σ. To facilitate comparability, we initialized the Target Model identically for each case and seeded all stochastic components in the training process (selection of clients and network dropout) identically.</p>
        </sec>
        <sec>
          <title>Measuring Effectiveness of Privacy Protection</title>
          <p>We measured the effectiveness of our privacy protection procedure by the <italic>External Attack</italic>’s ability to infer the mood status of noncompromised users. Designating a fixed subset of such users as a test set, we recorded the maximum test area under the curve achieved by the attacker’s classifier <italic>at any point during its training process (up to 600 epochs)</italic>. Although the attacker theoretically does not have access to the testing labels, this procedure provides an experimental upper bound on their performance. We also calculated the model sensitivity and positive predictive value (PPV) as more easily interpretable metrics. All metrics are calculated for each choice of σ and <italic>q</italic>. To facilitate comparability, all evaluations were performed with the attacker using the same Target Model parameters produced after 20 training epochs to generate their gradient data set. All the other stochastic components were seeded identically for each evaluation.</p>
        </sec>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Target Model Training Metrics</title>
        <p><xref rid="figure6" ref-type="fig">Figure 6</xref> plots the Target System’s model loss and accuracy metrics over each gradient update. Training progress under the conventional centralized training protocol is also plotted for reference. A σ value of 0 denotes FL with no DP implementation. We saw a clear effect of both FL and DP on the training process for the Target System’s statistical model. Relative to the centralized protocol, FL leads to a noticeable increase in training time, but the final model accuracy seems to be very similar (at least within the given training window). The addition of noise via DP seems to substantially affect both training time and final model accuracy. Interestingly, training loss seems to begin increasing at some point in the training process when additional noise is added, potentially indicating shortcomings in conventional optimization techniques when using both FL and DP. <xref rid="figure7" ref-type="fig">Figure 7</xref> shows the relationship between training time, final model utility, and noise scale, where training time is defined as the number of gradient updates needed for model loss to fall within 1% of the minimum loss over 1000 updates.</p>
        <fig id="figure6" position="float">
          <label>Figure 6</label>
          <caption>
            <p>Target system model performance. In all simulations, training runs for at most 1000 epochs.</p>
          </caption>
          <graphic xlink:href="jmir_v25i1e43664_fig6.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <fig id="figure7" position="float">
          <label>Figure 7</label>
          <caption>
            <p>Training time or final model utility versus noise scale.</p>
          </caption>
          <graphic xlink:href="jmir_v25i1e43664_fig7.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Privacy Protection Metrics</title>
        <p><xref rid="figure8" ref-type="fig">Figure 8</xref> shows the success of the <italic>External Attack</italic> in inferring a participant’s mood status based on the participant’s observed gradient to the fixed <italic>Target System</italic> model parameters. Immediately, we noted that FL alone (0 noise) is insufficient to protect against this type of attack. Even if the attacker has access to labeled data for only 10 participants (of 4274 total participants), they can successfully infer the mood status of a large majority of all other participants. However, adding noise to the gradient updates significantly decreases the attacker’s performance even when they have access to a large amount of labeled training data. <xref rid="figure9" ref-type="fig">Figure 9</xref> shows similar results for the attack’s PPV and sensitivity, given that a low mood status is viewed as a positive test result.</p>
        <p>Combined with the training metrics from the previous section, we can begin to assign concrete trade-offs between model utility and privacy protection in the context of this particular Target Model. For example, if the attacker has access to 100 participants’ labeled training data, setting the noise scale to 0.1 increases the training time by 26.2%, decreases the final model <italic>R</italic><sup>2</sup> by 11.5%, and decreases the privacy attack’s PPV by 17% relative to FL with no additional noise. Increasing the noise scale to 0.2 increases training time by 40.7%, decreases final model <italic>R</italic><sup>2</sup> by 17.3%, and decreases the PPV of privacy attack by 36.4% compared with FL with no noise.</p>
        <fig id="figure8" position="float">
          <label>Figure 8</label>
          <caption>
            <p>Attacker success versus noise scale. AUC: area under the curve; DP: differential privacy; FL: federated learning.</p>
          </caption>
          <graphic xlink:href="jmir_v25i1e43664_fig8.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <fig id="figure9" position="float">
          <label>Figure 9</label>
          <caption>
            <p>Attack PPV or sensitivity versus noise scale. PPV: positive predictive value.</p>
          </caption>
          <graphic xlink:href="jmir_v25i1e43664_fig9.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Vulnerability Metrics</title>
        <p>This section analyzes the benefits of our privacy protection procedure for various subgroups within our data. Although we should strive to maximize privacy protection within reason for all members of the population, it is instructive to examine who is most at risk from this privacy threat and hence who benefits the most from these privacy protection mechanisms.</p>
        <p><xref rid="figure10" ref-type="fig">Figure 10</xref> shows the correlation between the attacker’s prediction of a particular participant’s mood status versus the participant’s actual mood status. The y-axis shows the attacker’s predicted probability of each participant having a high mood status, where values above 0.5 indicate participants ultimately classified as having a high mood status. As expected, there was a strong positive association between the attacker’s prediction and the participant’s actual average mood when no additional noise was added to the FL. This indicates that participants with average mood scores that were much lower than the global average were simultaneously at a much higher risk of a successful inference attack. As privacy concerns are usually greatest for those with sensitive health conditions, this further underscores the insufficiency of FL alone. Fortunately, the addition of sufficient noise can eliminate this correlation, neutralizing privacy risks regardless of the participant’s mood tendencies. Similar analyses of privacy risk across gender, ethnicity, and age groups did not yield noteworthy findings. The associated figures can be found in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p>
        <fig id="figure10" position="float">
          <label>Figure 10</label>
          <caption>
            <p>Attacker’s predicted mood status versus participant average mood for different differential privacy (DP) noise parameters.</p>
          </caption>
          <graphic xlink:href="jmir_v25i1e43664_fig10.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <p>Our results showed that FL combined with DP substantially reduced the attacker’s ability to infer the participant mood status in our simulation. The 3 attacker success metrics we report (area under the curve, PPV, and sensitivity) all approach 0.5 with increased noise, meaning that the attacker only performs slightly better than a random guess in these situations. In addition, the attacker does not perform better when classifying those with extreme mood scores, protecting those who may be more vulnerable to an attack. This protection results in a decrease in the utility of the target model, but even in the worst-case scenario, <italic>R</italic><sup>2</sup> decreases by only 17.3% compared with FL without noise.</p>
        <p>In contrast to the studies by Naseri et al [<xref ref-type="bibr" rid="ref23">23</xref>] and Melis et al [<xref ref-type="bibr" rid="ref24">24</xref>], both of which found that FL and DP are insufficient to protect against this type of attack in other domains [<xref ref-type="bibr" rid="ref23">23</xref>,<xref ref-type="bibr" rid="ref24">24</xref>], we found that FL and DP could potentially provide a reasonable trade-off when used in a large-scale mHealth system based on health sensor data. A natural next step is to understand the reasons for this difference on a theoretical basis and explore how FL and DP fare in domains outside of those mentioned in this analysis. Nevertheless, these results underscored our belief that context is crucial in privacy research; there is no universal approach that will work in every setting.</p>
        <p>Our analysis addresses the challenge of quantifying and communicating the level of protection offered by privacy-preserving methods. We narrowed the scope of the <italic>External Attack</italic> and adopted a simulation-based approach to produce metrics that are both meaningful and easy to interpret. Our results underscore two important principles for future work on privacy protection: (1) FL, despite its simplicity and broad acceptance, is insufficient to protect against advanced privacy attacks on its own and (2) mHealth users whose health indicators deviate strongly from those of the general population (ie, those who most require privacy protection) are at a higher risk under our threat model. We must evaluate and improve existing tools for privacy protection and ensure that new approaches consider the individual needs and vulnerabilities of system users.</p>
      </sec>
      <sec>
        <title>Limitations of the Analysis</title>
        <p>It is possible that our implementation of the property inference attack may not exploit sufficient weaknesses in our Target System setup or our implementation of FL with local DP may not be perfectly optimized for our data set. These issues would result in model metrics that overstate the efficacy of our privacy protection measures or the cost of privacy protection. Future research could establish more credibility for implementing FL with DP by verifying our results using modified target systems and attacks. Experiments with variations of the FL and DP algorithms could also provide more favorable trade-offs against some or all external attacks.</p>
        <p>Our methods are also predicated on specific assumptions about the capabilities and resources of the attacker. We assumed the failure of more conventional privacy-preserving technologies to stop attackers. Although this may not be true in all real-world scenarios, the number of high-profile cybersecurity breaches in recent years justifies preparing for the worst-case scenario. Given sufficient incentive, it is likely that bad actors would attempt to procure the access and resources required to bypass conventional privacy measures and carry out our proposed attack. Future research should examine how FL and DP interact with other conventional privacy measures, especially if other technologies can compensate for the weaknesses of FL and DP.</p>
        <p>Missing data imputation is another area for future research on FL and DP implementation. Our imputation procedure is one of the main threats to the validity of our results, and it seems likely that future mHealth studies will suffer from similar problems. Although our analysis does not devote attention to a rigorous solution to this problem, we acknowledged that imputation methods deserve the same scrutiny for privacy risks as the other tools used for mHealth research.</p>
        <p>It is important to note that our simulation-based approach to measuring privacy protection sacrifices some statistical rigor in favor of interpretability. Unlike many DP implementations, we cannot guarantee the maximum bounds on the attacker’s probability of successful inference. It is not entirely clear how the mechanics of this attack change based on the Target System’s model architecture (neural network or otherwise), Target System’s classification task, attacker’s resources, attacker’s intended inference task, or properties of the underlying population. We hope that this work provides the foundation for the future development of numerical methods to approximate protection levels over a broader range of models and populations or even theoretical bounds on the likelihood of successful property inference in mHealth systems.</p>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>It is our sincere hope that mHealth research will continue to generate robust tools for privacy protection along with novel statistical methodologies and technical improvements. The weaknesses of FL clearly demonstrated the dangers of complacency: threats will continue to evolve and our privacy protection technologies cannot fall behind. As mHealth applications continue to scale, safeguarding public trust must remain a top priority for researchers and practitioners alike.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>Additional details and results for the regression task.</p>
        <media xlink:href="jmir_v25i1e43664_app1.docx" xlink:title="DOCX File , 279 KB"/>
      </supplementary-material>
      <supplementary-material id="app2">
        <label>Multimedia Appendix 2</label>
        <p>Results from the binary classification task.</p>
        <media xlink:href="jmir_v25i1e43664_app2.docx" xlink:title="DOCX File , 1330 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">AUC</term>
          <def>
            <p>area under the curve</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">DP</term>
          <def>
            <p>differential privacy</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">EMA</term>
          <def>
            <p>ecological momentary assessment</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">FL</term>
          <def>
            <p>federated learning</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">IHS</term>
          <def>
            <p>Intern Health Study</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">mHealth</term>
          <def>
            <p>mobile health</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">PPV</term>
          <def>
            <p>positive predictive value</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>This research was supported by the National Institutes of Health under awards R01MH101459 and R01MH131617.</p>
    </ack>
    <notes>
      <sec>
        <title>Data Availability</title>
        <p>The data sets generated and analyzed during this study are not publicly available because of considerations regarding participant privacy but are available from the corresponding author on reasonable request.</p>
        <p>The computer code that supports the findings of this study is available on reasonable request from the corresponding author, AS. The code is not publicly available because of considerations of participant privacy. However, a version of the code used in this study was developed for use with the publicly available Wearable Stress and Affect Detection (WESAD) data set [<xref ref-type="bibr" rid="ref27">27</xref>]. The code and instructions for downloading the WESAD data and running the experiments are available on the web [<xref ref-type="bibr" rid="ref28">28</xref>].</p>
      </sec>
    </notes>
    <fn-group>
      <fn fn-type="con">
        <p>AS planned and performed the analyses and produced the results presented in this manuscript. He also conducted background research necessary for simulating privacy attacks and implementing both FL and DP on the Target System. He is currently affiliated with Carnegie Mellon University; however, the research described in this paper was performed at the University of Michigan. LF assisted in the completion of the final manuscript, including reviewing, proofreading, compiling references, and ensuring the proper labeling of tables and figures. AT provided the initial idea for this analysis and pointed out the team to relevant resources. He also provided frequent and impactful guidance during the course of the analysis, including reviews of the final manuscript. SS was the principal investigator of the University of Michigan Intern Health Study (IHS) and provided the data used in this analysis. He also provided the input for the final manuscript.</p>
      </fn>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="web">
          <source>Research and Markets</source>
          <access-date>2022-08-26</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.researchandmarkets.com/reports/5124989/wearable-technology-market-size-share-and-trends">https://www.researchandmarkets.com/reports/5124989/wearable-technology-market-size-share-​and-trends</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="web">
          <source>Grand View Research</source>
          <access-date>2022-08-26</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.grandviewresearch.com/industry-analysis/mhealth-market">https://www.grandviewresearch.com/industry-analysis/mhealth-market</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cao</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Lim</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Sengoku</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Guo</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Kodama</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Exploring the shift in international trends in mobile health research from 2000 to 2020: bibliometric analysis</article-title>
          <source>JMIR Mhealth Uhealth</source>
          <year>2021</year>
          <month>09</month>
          <day>08</day>
          <volume>9</volume>
          <issue>9</issue>
          <fpage>e31097</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://mhealth.jmir.org/2021/9/e31097/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/31097</pub-id>
          <pub-id pub-id-type="medline">34494968</pub-id>
          <pub-id pub-id-type="pii">v9i9e31097</pub-id>
          <pub-id pub-id-type="pmcid">PMC8459219</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kotz</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Gunter</surname>
              <given-names>CA</given-names>
            </name>
            <name name-style="western">
              <surname>Kumar</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Weiner</surname>
              <given-names>JP</given-names>
            </name>
          </person-group>
          <article-title>Privacy and security in mobile health: a research agenda</article-title>
          <source>Computer (Long Beach Calif)</source>
          <year>2016</year>
          <month>06</month>
          <volume>49</volume>
          <issue>6</issue>
          <fpage>22</fpage>
          <lpage>30</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/28344359"/>
          </comment>
          <pub-id pub-id-type="doi">10.1109/MC.2016.185</pub-id>
          <pub-id pub-id-type="medline">28344359</pub-id>
          <pub-id pub-id-type="pmcid">PMC5362254</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Auxier</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Rainie</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Anderson</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Perrin</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Kumar</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Turner</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Americans and privacy: concerned, confused and feeling lack of control over their personal information</article-title>
          <source>Pew Research Center</source>
          <year>2019</year>
          <month>11</month>
          <day>15</day>
          <access-date>2022-08-26</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.pewresearch.org/internet/2019/11/15/americans-and-privacy-concerned-confused-and-feeling-lack-of-control-over-their-personal-information/">https://www.pewresearch.org/internet/2019/11/15/americans-and-privacy-concerned-confused-and-feeling-lack-of-control-over-their-personal-information/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Guo</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Sun</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>The privacy–personalization paradox in mHealth services acceptance of different age groups</article-title>
          <source>Electronic Commerce Res Appl</source>
          <year>2016</year>
          <month>03</month>
          <volume>16</volume>
          <fpage>55</fpage>
          <lpage>65</lpage>
          <pub-id pub-id-type="doi">10.1016/j.elerap.2015.11.001</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Koffi</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Yazdanmehr</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Mahapatra</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Mobile health privacy concerns - a systematic review</article-title>
          <source>Proceedings of AMCIS 2018</source>
          <year>2018</year>
          <conf-name>AMCIS 2018 Proceedings</conf-name>
          <conf-date>Aug 16-18, 2018</conf-date>
          <conf-loc>New Orleans, Louisiana, USA</conf-loc>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://aisel.aisnet.org/amcis2018/Health/Presentations/25"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Brooks</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>3 Key Cybersecurity Trends To Know For 2021 (and On ...)</article-title>
          <source>Forbes</source>
          <year>2021</year>
          <month>4</month>
          <day>12</day>
          <access-date>2022-08-26</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.forbes.com/sites/chuckbrooks/2021/04/12/3-key-cybersecurity-trends-to-know-for-2021-and-on-/">https://www.forbes.com/sites/chuckbrooks/2021/04/12/3-key-cybersecurity-trends-to-know-for-2021-and-on-/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Domingo-Ferrer</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Blanco-Justicia</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Privacy-preserving technologies</article-title>
          <source>The Ethics of Cybersecurity</source>
          <year>2020</year>
          <publisher-loc>Cham</publisher-loc>
          <publisher-name>Springer International Publishing</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Lai</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Suda</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Civin</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Chandra</surname>
              <given-names>V</given-names>
            </name>
          </person-group>
          <article-title>Federated learning with Non-IID data</article-title>
          <source>arXiv</source>
          <year>2018</year>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1806.00582"/>
          </comment>
          <pub-id pub-id-type="doi">10.48550/arXiv.1806.00582</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>JC</given-names>
            </name>
            <name name-style="western">
              <surname>Goetz</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Sen</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Tewari</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Learning from others without sacrificing privacy: simulation comparing centralized and federated machine learning on mobile health data</article-title>
          <source>JMIR Mhealth Uhealth</source>
          <year>2021</year>
          <month>03</month>
          <day>30</day>
          <volume>9</volume>
          <issue>3</issue>
          <fpage>e23728</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://mhealth.jmir.org/2021/3/e23728/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/23728</pub-id>
          <pub-id pub-id-type="medline">33783362</pub-id>
          <pub-id pub-id-type="pii">v9i3e23728</pub-id>
          <pub-id pub-id-type="pmcid">PMC8044739</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Dwork</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Roth</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>The algorithmic foundations of differential privacy</article-title>
          <source>Foundation Trend Theoretical Comput Sci</source>
          <year>2014</year>
          <volume>9</volume>
          <issue>3-4</issue>
          <fpage>211</fpage>
          <lpage>407</lpage>
          <pub-id pub-id-type="doi">10.1561/0400000042</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Abadi</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Chu</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Goodfellow</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>McMahan</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Mironov</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Talwar</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Deep learning with differential privacy</article-title>
          <source>Proceedings of the 2016 ACM SIGSAC Conference on Computer and Communications Security</source>
          <year>2016</year>
          <conf-name>CCS'16: 2016 ACM SIGSAC Conference on Computer and Communications Security</conf-name>
          <conf-date>Oct 24 - 28, 2016</conf-date>
          <conf-loc>Vienna Austria</conf-loc>
          <pub-id pub-id-type="doi">10.1145/2976749.2978318</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Brisimi</surname>
              <given-names>TS</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Mela</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Olshevsky</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Paschalidis</surname>
              <given-names>IC</given-names>
            </name>
            <name name-style="western">
              <surname>Shi</surname>
              <given-names>W</given-names>
            </name>
          </person-group>
          <article-title>Federated learning of predictive models from federated Electronic Health Records</article-title>
          <source>Int J Med Inform</source>
          <year>2018</year>
          <month>04</month>
          <volume>112</volume>
          <fpage>59</fpage>
          <lpage>67</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/29500022"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.ijmedinf.2018.01.007</pub-id>
          <pub-id pub-id-type="medline">29500022</pub-id>
          <pub-id pub-id-type="pii">S1386-5056(18)30008-X</pub-id>
          <pub-id pub-id-type="pmcid">PMC5836813</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>GH</given-names>
            </name>
            <name name-style="western">
              <surname>Shin</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Federated learning on clinical benchmark data: performance assessment</article-title>
          <source>J Med Internet Res</source>
          <year>2020</year>
          <month>10</month>
          <day>26</day>
          <volume>22</volume>
          <issue>10</issue>
          <fpage>e20891</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2020/10/e20891/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/20891</pub-id>
          <pub-id pub-id-type="medline">33104011</pub-id>
          <pub-id pub-id-type="pii">v22i10e20891</pub-id>
          <pub-id pub-id-type="pmcid">PMC7652692</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Dayan</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Roth</surname>
              <given-names>HR</given-names>
            </name>
            <name name-style="western">
              <surname>Zhong</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Harouni</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Gentili</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Abidin</surname>
              <given-names>AZ</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Costa</surname>
              <given-names>AB</given-names>
            </name>
            <name name-style="western">
              <surname>Wood</surname>
              <given-names>BJ</given-names>
            </name>
            <name name-style="western">
              <surname>Tsai</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Hsu</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>CK</given-names>
            </name>
            <name name-style="western">
              <surname>Ruan</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Kitamura</surname>
              <given-names>FC</given-names>
            </name>
            <name name-style="western">
              <surname>Lacey</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>de Antônio Corradi</surname>
              <given-names>GC</given-names>
            </name>
            <name name-style="western">
              <surname>Nino</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Shin</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Obinata</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Ren</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Crane</surname>
              <given-names>JC</given-names>
            </name>
            <name name-style="western">
              <surname>Tetreault</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Guan</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Garrett</surname>
              <given-names>JW</given-names>
            </name>
            <name name-style="western">
              <surname>Kaggie</surname>
              <given-names>JD</given-names>
            </name>
            <name name-style="western">
              <surname>Park</surname>
              <given-names>JG</given-names>
            </name>
            <name name-style="western">
              <surname>Dreyer</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Juluru</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Kersten</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Rockenbach</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Linguraru</surname>
              <given-names>MG</given-names>
            </name>
            <name name-style="western">
              <surname>Haider</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>AbdelMaseeh</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Rieke</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Damasceno</surname>
              <given-names>PF</given-names>
            </name>
            <name name-style="western">
              <surname>E Silva</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Kawano</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Sriswasdi</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Park</surname>
              <given-names>SY</given-names>
            </name>
            <name name-style="western">
              <surname>Grist</surname>
              <given-names>TM</given-names>
            </name>
            <name name-style="western">
              <surname>Buch</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Jantarabenjakul</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Tak</surname>
              <given-names>WY</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Kwon</surname>
              <given-names>YJ</given-names>
            </name>
            <name name-style="western">
              <surname>Quraini</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Feng</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Priest</surname>
              <given-names>AN</given-names>
            </name>
            <name name-style="western">
              <surname>Turkbey</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Glicksberg</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Bizzo</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>BS</given-names>
            </name>
            <name name-style="western">
              <surname>Tor-Díez</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Hsu</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Lai</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Hess</surname>
              <given-names>CP</given-names>
            </name>
            <name name-style="western">
              <surname>Compas</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Bhatia</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Oermann</surname>
              <given-names>EK</given-names>
            </name>
            <name name-style="western">
              <surname>Leibovitz</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Sasaki</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Mori</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Sohn</surname>
              <given-names>JH</given-names>
            </name>
            <name name-style="western">
              <surname>Murthy</surname>
              <given-names>KN</given-names>
            </name>
            <name name-style="western">
              <surname>Fu</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>de Mendonça</surname>
              <given-names>MR</given-names>
            </name>
            <name name-style="western">
              <surname>Fralick</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Kang</surname>
              <given-names>MK</given-names>
            </name>
            <name name-style="western">
              <surname>Adil</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Gangai</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Vateekul</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Elnajjar</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Hickman</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Majumdar</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>McLeod</surname>
              <given-names>SL</given-names>
            </name>
            <name name-style="western">
              <surname>Reed</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Gräf</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Harmon</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Kodama</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Puthanakit</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Mazzulli</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>de Lavor</surname>
              <given-names>VL</given-names>
            </name>
            <name name-style="western">
              <surname>Rakvongthai</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>YR</given-names>
            </name>
            <name name-style="western">
              <surname>Wen</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Gilbert</surname>
              <given-names>FJ</given-names>
            </name>
            <name name-style="western">
              <surname>Flores</surname>
              <given-names>MG</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>Q</given-names>
            </name>
          </person-group>
          <article-title>Federated learning for predicting clinical outcomes in patients with COVID-19</article-title>
          <source>Nat Med</source>
          <year>2021</year>
          <month>10</month>
          <day>15</day>
          <volume>27</volume>
          <issue>10</issue>
          <fpage>1735</fpage>
          <lpage>43</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/34526699"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41591-021-01506-3</pub-id>
          <pub-id pub-id-type="medline">34526699</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41591-021-01506-3</pub-id>
          <pub-id pub-id-type="pmcid">PMC9157510</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Can</surname>
              <given-names>YS</given-names>
            </name>
            <name name-style="western">
              <surname>Ersoy</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Privacy-preserving federated deep learning for wearable IoT-based biomedical monitoring</article-title>
          <source>ACM Trans Internet Technol</source>
          <year>2021</year>
          <month>02</month>
          <day>28</day>
          <volume>21</volume>
          <issue>1</issue>
          <fpage>1</fpage>
          <lpage>17</lpage>
          <pub-id pub-id-type="doi">10.1145/3428152</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ficek</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Dagne</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Daley</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Differential privacy in health research: a scoping review</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2021</year>
          <month>09</month>
          <day>18</day>
          <volume>28</volume>
          <issue>10</issue>
          <fpage>2269</fpage>
          <lpage>76</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/34333623"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/jamia/ocab135</pub-id>
          <pub-id pub-id-type="medline">34333623</pub-id>
          <pub-id pub-id-type="pii">6333353</pub-id>
          <pub-id pub-id-type="pmcid">PMC8449619</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Niinimäki</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Heikkilä</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Honkela</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Kaski</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Representation transfer for differentially private drug sensitivity prediction</article-title>
          <source>Bioinformatics</source>
          <year>2019</year>
          <month>07</month>
          <day>15</day>
          <volume>35</volume>
          <issue>14</issue>
          <fpage>i218</fpage>
          <lpage>24</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/31510659"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/bioinformatics/btz373</pub-id>
          <pub-id pub-id-type="medline">31510659</pub-id>
          <pub-id pub-id-type="pii">5529143</pub-id>
          <pub-id pub-id-type="pmcid">PMC6612875</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Zhou</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Qiu</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>DO</given-names>
            </name>
          </person-group>
          <article-title>Blockchain-enabled contextual online learning under local differential privacy for coronary heart disease diagnosis in mobile edge computing</article-title>
          <source>IEEE J Biomed Health Inform</source>
          <year>2020</year>
          <month>8</month>
          <volume>24</volume>
          <issue>8</issue>
          <fpage>2177</fpage>
          <lpage>88</lpage>
          <pub-id pub-id-type="doi">10.1109/jbhi.2020.2999497</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Choudhury</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Gkoulalas-Divanis</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Salonidis</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Sylla</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Park</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Hsu</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Das</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Differential privacy-enabled federated learning for sensitive health data</article-title>
          <source>arXiv</source>
          <year>2020</year>
          <pub-id pub-id-type="doi">10.48550/arXiv.1910.02578</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hsu</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Gaboardi</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Haeberlen</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Khanna</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Narayan</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Pierce</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Roth</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Differential privacy: an economic method for choosing epsilon</article-title>
          <source>Proceedings of the 2014 IEEE 27th Computer Security Foundations Symposium</source>
          <year>2014</year>
          <conf-name>2014 IEEE 27th Computer Security Foundations Symposium</conf-name>
          <conf-date>Jul 19-22, 2014</conf-date>
          <conf-loc>Vienna, Austria</conf-loc>
          <pub-id pub-id-type="doi">10.1109/csf.2014.35</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Naseri</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Hayes</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>De Cristofaro</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Local and central differential privacy for robustness and privacy in federated learning</article-title>
          <source>arXiv</source>
          <year>2020</year>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/2009.03561"/>
          </comment>
          <pub-id pub-id-type="doi">10.14722/ndss.2022.23054</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Melis</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Song</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>De Cristofaro</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Shmatikov</surname>
              <given-names>V</given-names>
            </name>
          </person-group>
          <article-title>Exploiting unintended feature leakage in collaborative learning</article-title>
          <source>Proceedings of the IEEE Symposium on Security and Privacy (SP)</source>
          <year>2019</year>
          <conf-name>IEEE Symposium on Security and Privacy (SP)</conf-name>
          <conf-date>May 19-23, 2019</conf-date>
          <conf-loc>San Francisco, CA, USA</conf-loc>
          <pub-id pub-id-type="doi">10.1109/sp.2019.00029</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sen</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Kranzler</surname>
              <given-names>HR</given-names>
            </name>
            <name name-style="western">
              <surname>Krystal</surname>
              <given-names>JH</given-names>
            </name>
            <name name-style="western">
              <surname>Speller</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Chan</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Gelernter</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Guille</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>A prospective cohort study investigating factors associated with depression during medical internship</article-title>
          <source>Arch Gen Psychiatry</source>
          <year>2010</year>
          <month>06</month>
          <day>01</day>
          <volume>67</volume>
          <issue>6</issue>
          <fpage>557</fpage>
          <lpage>65</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/20368500"/>
          </comment>
          <pub-id pub-id-type="doi">10.1001/archgenpsychiatry.2010.41</pub-id>
          <pub-id pub-id-type="medline">20368500</pub-id>
          <pub-id pub-id-type="pii">2010.41</pub-id>
          <pub-id pub-id-type="pmcid">PMC4036806</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Azur</surname>
              <given-names>MJ</given-names>
            </name>
            <name name-style="western">
              <surname>Stuart</surname>
              <given-names>EA</given-names>
            </name>
            <name name-style="western">
              <surname>Frangakis</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Leaf</surname>
              <given-names>PJ</given-names>
            </name>
          </person-group>
          <article-title>Multiple imputation by chained equations: what is it and how does it work?</article-title>
          <source>Int J Methods Psychiatr Res</source>
          <year>2011</year>
          <month>03</month>
          <day>24</day>
          <volume>20</volume>
          <issue>1</issue>
          <fpage>40</fpage>
          <lpage>9</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/21499542"/>
          </comment>
          <pub-id pub-id-type="doi">10.1002/mpr.329</pub-id>
          <pub-id pub-id-type="medline">21499542</pub-id>
          <pub-id pub-id-type="pmcid">PMC3074241</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Schmidt</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Reiss</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Duerichen</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Marberger</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Van Laerhoven</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Introducing WESAD, a multimodal dataset for wearable stress and affect detection</article-title>
          <source>Proceedings of the 20th ACM International Conference on Multimodal Interaction</source>
          <year>2018</year>
          <conf-name>ICMI '18: International Conference on Multimodal Interaction</conf-name>
          <conf-date>Oct 16 - 20, 2018</conf-date>
          <conf-loc>Boulder CO USA</conf-loc>
          <pub-id pub-id-type="doi">10.1145/3242969.3242985</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="web">
          <article-title>ihs-privacy-paper</article-title>
          <source>GitHub</source>
          <access-date>2023-04-04</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://github.com/Alex-Shen-93/ihs-privacy-paper">https://github.com/Alex-Shen-93/ihs-privacy-paper</ext-link>
          </comment>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
