<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMIR</journal-id>
      <journal-id journal-id-type="nlm-ta">J Med Internet Res</journal-id>
      <journal-title>Journal of Medical Internet Research</journal-title>
      <issn pub-type="epub">1438-8871</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v22i12e22555</article-id>
      <article-id pub-id-type="pmid">33289676</article-id>
      <article-id pub-id-type="doi">10.2196/22555</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Web-Based Privacy-Preserving Multicenter Medical Data Analysis Tools Via Threshold Homomorphic Encryption: Design and Development Study</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>El Emam</surname>
            <given-names>Khaled</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Wu</surname>
            <given-names>Chih-Jen</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Essex</surname>
            <given-names>Aleksander</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Thaine</surname>
            <given-names>Patricia</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Lu</surname>
            <given-names>Yao</given-names>
          </name>
          <degrees>BSc</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-0261-0713</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Zhou</surname>
            <given-names>Tianshu</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-5858-6353</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Tian</surname>
            <given-names>Yu</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-6791-8217</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Zhu</surname>
            <given-names>Shiqiang</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-5687-4001</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Li</surname>
            <given-names>Jingsong</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>Engineering Research Center of EMR and Intelligent Expert System</institution>
            <institution>Key Laboratory for Biomedical Engineering of Ministry of Education, College of Biomedical Engineering and Instrument Science</institution>
            <institution>Zhejiang University</institution>
            <addr-line>38 Zheda Road</addr-line>
            <addr-line>Hangzhou, 310027</addr-line>
            <country>China</country>
            <phone>86 571 87951564</phone>
            <email>ljs@zju.edu.cn</email>
          </address>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-1064-637X</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Engineering Research Center of EMR and Intelligent Expert System</institution>
        <institution>Key Laboratory for Biomedical Engineering of Ministry of Education, College of Biomedical Engineering and Instrument Science</institution>
        <institution>Zhejiang University</institution>
        <addr-line>Hangzhou</addr-line>
        <country>China</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Zhejiang Lab</institution>
        <addr-line>Hangzhou</addr-line>
        <country>China</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Jingsong Li <email>ljs@zju.edu.cn</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <month>12</month>
        <year>2020</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>8</day>
        <month>12</month>
        <year>2020</year>
      </pub-date>
      <volume>22</volume>
      <issue>12</issue>
      <elocation-id>e22555</elocation-id>
      <history>
        <date date-type="received">
          <day>16</day>
          <month>7</month>
          <year>2020</year>
        </date>
        <date date-type="rev-request">
          <day>10</day>
          <month>8</month>
          <year>2020</year>
        </date>
        <date date-type="rev-recd">
          <day>2</day>
          <month>10</month>
          <year>2020</year>
        </date>
        <date date-type="accepted">
          <day>6</day>
          <month>11</month>
          <year>2020</year>
        </date>
      </history>
      <copyright-statement>©Yao Lu, Tianshu Zhou, Yu Tian, Shiqiang Zhu, Jingsong Li. Originally published in the Journal of Medical Internet Research (http://www.jmir.org), 08.12.2020.</copyright-statement>
      <copyright-year>2020</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in the Journal of Medical Internet Research, is properly cited. The complete bibliographic information, a link to the original publication on http://www.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="http://www.jmir.org/2020/12/e22555/" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Data sharing in multicenter medical research can improve the generalizability of research, accelerate progress, enhance collaborations among institutions, and lead to new discoveries from data pooled from multiple sources. Despite these benefits, many medical institutions are unwilling to share their data, as sharing may cause sensitive information to be leaked to researchers, other institutions, and unauthorized users. Great progress has been made in the development of secure machine learning frameworks based on homomorphic encryption in recent years; however, nearly all such frameworks use a single secret key and lack a description of how to securely evaluate the trained model, which makes them impractical for multicenter medical applications.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>The aim of this study is to provide a privacy-preserving machine learning protocol for multiple data providers and researchers (eg, logistic regression). This protocol allows researchers to train models and then evaluate them on medical data from multiple sources while providing privacy protection for both the sensitive data and the learned model.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>We adapted a novel threshold homomorphic encryption scheme to guarantee privacy requirements. We devised new relinearization key generation techniques for greater scalability and multiplicative depth and new model training strategies for simultaneously training multiple models through x-fold cross-validation.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>Using a client-server architecture, we evaluated the performance of our protocol. The experimental results demonstrated that, with 10-fold cross-validation, our privacy-preserving logistic regression model training and evaluation over 10 attributes in a data set of 49,152 samples took approximately 7 minutes and 20 minutes, respectively.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>We present the first privacy-preserving multiparty logistic regression model training and evaluation protocol based on threshold homomorphic encryption. Our protocol is practical for real-world use and may promote multicenter medical research to some extent.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>machine learning</kwd>
        <kwd>confidentiality</kwd>
        <kwd>threshold homomorphic encryption</kwd>
        <kwd>logistic regression</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <sec>
        <title>Background</title>
        <p>In recent years, researchers have proposed strong requirements for the quality of medical research as it continues to progress, which has promoted the development of multicenter research. Compared with single-center research, multicenter research has many significant advantages, including enabling specific analyses for which no single institution has sufficient data, such as on a rare disease; providing medical data from different locations with diverse demographics, which increases the reproducibility and generalizability of the research; and generating pooled medical data that enables new discoveries that cannot be elucidated from any individual data set [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref2">2</xref>]. In addition, the development of multicenter medical research has accelerated the translation of research outcomes into clinical practice and strengthened collaborations among institutions [<xref ref-type="bibr" rid="ref2">2</xref>,<xref ref-type="bibr" rid="ref3">3</xref>].</p>
        <p>However, data sharing during multicenter research may increase privacy security risks. As medical data are highly sensitive, the leakage of sensitive information will lead to severe consequences, such as financial loss, social discrimination, and unauthorized data abuse, which can harm both patients and medical institutions [<xref ref-type="bibr" rid="ref4">4</xref>]. As a result, many medical institutions are unwilling to share their data despite the aforementioned benefits, which hinders the collaborative benefits of multicenter research. To solve this problem, a framework is urgently needed to support multicenter medical research efficiently while preventing the leakage of sensitive information.</p>
      </sec>
      <sec>
        <title>Prior Work</title>
        <p>Logistic regression is a widely used machine learning approach in various medical applications, such as prognostic prediction, disease diagnosis, and decision-making support [<xref ref-type="bibr" rid="ref5">5</xref>]. For example, Abdolmaleki et al [<xref ref-type="bibr" rid="ref6">6</xref>] used logistic regression to predict the outcome of biopsy in breast cancer and obtained 90% accuracy. Many solutions have been developed to address privacy-preserving logistic regression. Some use intermediary statistics to train a model without accessing the raw data; however, these methods remain vulnerable to statistical attack when a particular criterion holds true for only one sample [<xref ref-type="bibr" rid="ref7">7</xref>-<xref ref-type="bibr" rid="ref9">9</xref>]. Other researchers use homomorphic encryption to protect privacy during model training, which is similar to that used in this study [<xref ref-type="bibr" rid="ref10">10</xref>-<xref ref-type="bibr" rid="ref19">19</xref>]. Homomorphic encryption technology provides rigorous protection for sensitive information and enables the computation of information in an encrypted format and is, therefore, a potential candidate for secure logistic regression model training. However, unlike our solution, these homomorphic encryption–based solutions yield only sets of parameters, and there are no methods to evaluate the trained model in a secure manner. Furthermore, these methods use a single public and secret key, meaning that all the research data may be exposed to anyone who holds the secret key, limiting the application of these solutions in real-life scenarios. In the current literature, the works most similar to ours are those of Emam et al [<xref ref-type="bibr" rid="ref18">18</xref>] and Jiang et al [<xref ref-type="bibr" rid="ref19">19</xref>], which attempt to avoid information leak using methods that differ from ours. Emam et al [<xref ref-type="bibr" rid="ref18">18</xref>] kept the data local to the corresponding data providers and used the Paillier scheme to deal with intermediate values. However, because the public and secret keys are stored at the central unit, when multiple parties collude with the central unit, some meaningful information about the other parties’ sensitive data may be revealed to them [<xref ref-type="bibr" rid="ref18">18</xref>]. Jiang et al [<xref ref-type="bibr" rid="ref19">19</xref>] proposed a hybrid cryptographic method that uses a software guard extensions (SGX) enclave to securely generate and store the secret key in a trusted cloud. As the cloud server is shared among different users, it is more likely to be attacked. Considering the rapid development of attack methods toward SGX, including a recently proposed method capable of stealing the enclave secret to subvert the confidentiality of SGX, placement of the secret key in the cloud is not secure [<xref ref-type="bibr" rid="ref20">20</xref>]. Once the attackers break through the SGX’s guard, they will be able to obtain the secret key and decrypt all the sensitive information stored on the cloud, leading to a severe outcome.</p>
        <p>Multikey homomorphic encryption, first proposed by López-Alt et al [<xref ref-type="bibr" rid="ref21">21</xref>], allows computations on ciphertexts under different secret keys, which makes the method suitable for secure multicenter research. However, the scheme proposed in the study by Lopez et al [<xref ref-type="bibr" rid="ref21">21</xref>] is based on the Nth degree truncated polynomial ring units cryptosystem, where if we obtain a result computed from ciphertexts under different keys, we will need to decrypt the result by the product of all involved secret keys, allowing for only a very limited number of parties before the decryption error grows too large to obtain the correct plaintext result. Another multikey homomorphic encryption method, called threshold homomorphic encryption, allows many more parties to participate without resulting in an excessively large decryption error; however, the noise generated in the relinearization is still very large and grows quadratically with the number of parties, which would have a negative effect on the multiplicative depth [<xref ref-type="bibr" rid="ref22">22</xref>].</p>
      </sec>
      <sec>
        <title>Objectives</title>
        <p>In this study, we propose a privacy-preserving multicenter research protocol using secure logistic regression, consisting of 3 primary entities: researchers, a service provider, and data providers, in which medical data are horizontally distributed. Our proposed protocol supports not only model training but also the evaluation of the trained model in a secure manner. The protocol guarantees the privacy of both the sensitive data for the data providers and the trained model for the researchers during model training and trained model evaluation. To satisfy privacy requirements, we apply threshold homomorphic encryption and propose a new relinearization key generation process that increases scalability and multiplicative depth. The proposed protocol has been implemented and tested with simulated real-life scenarios. The experimental results demonstrate that our protocol is efficient and practical for real-world applications.</p>
      </sec>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Overview of the Presented Protocol</title>
        <p>Our proposed protocol includes 3 primary entities as shown below. The architecture of the proposed protocol is shown in <xref rid="figure1" ref-type="fig">Figure 1</xref>.</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>The architecture of the proposed protocol, containing 3 entities: data providers, a service provider, and researchers.</p>
          </caption>
          <graphic xlink:href="jmir_v22i12e22555_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <sec>
          <title>Data Providers</title>
          <p>These include institutions (eg, hospitals) who hold medical data and are willing to provide these data to the service provider for public use so long as the privacy of the data is preserved. To share medical data, the data providers must obtain patient consent if the local law requires so. Upon receiving the researchers’ requests from the service provider, the data providers can decide whether to accept or refuse. To allow researchers to obtain correct research data, all data providers must implement data standardization to transform the data into a common format, such as the Observational Medical Outcomes Partnership common data model from the Observational Health Data Sciences and Informatics collaborative [<xref ref-type="bibr" rid="ref23">23</xref>].</p>
        </sec>
        <sec>
          <title>Service Provider</title>
          <p>This refers to an entity that (1) provides storage for encrypted data and research information, (2) performs the most computationally expensive part of the privacy-preserving logistic regression, and (3) performs information transfer among the data providers, the service provider, and the researchers. In addition, an interactive website is deployed by the service provider for researchers to conduct their studies in a secure manner and for data providers to authorize certain research requests.</p>
        </sec>
        <sec>
          <title>Researchers</title>
          <p>This includes the individuals or organizations who want to conduct research on multiple data providers’ data sets. Researchers submit their requests to the service provider, which are then sent to the data providers for further processing.</p>
          <p>As we use threshold homomorphic encryption to guarantee data and model security, in our proposed protocol, one public key corresponds to multiple secret keys, and different secret keys are distributed to different data providers and researchers. Furthermore, we assume that there exist at least one honest party and some semihonest adversaries that are capable of reading the internal information of the colluding parties while not deviating from the defined protocol [<xref ref-type="bibr" rid="ref24">24</xref>].</p>
        </sec>
      </sec>
      <sec>
        <title>Logistic Regression</title>
        <p>Logistic regression is a classification algorithm that is widely used in medicine, including for disease diagnosis, clinical decision support, and risk assessment. Suppose a data set consists of pairs (<italic>x<sub>i</sub></italic>, <italic>y<sub>i</sub></italic>), for <italic>i</italic>=1,...,<italic>N</italic>, where <italic>x<sub>i</sub></italic> denotes a vector of input features <italic>x<sub>i</sub></italic>=(<italic>x<sub>i</sub><sup>1</sup></italic>,...,<italic>x<sub>i</sub><sup>d</sup></italic>) and <italic>y<sub>i</sub></italic> is the class label. We then have:</p>
        <p>
          <disp-formula>
            <graphic xlink:href="jmir_v22i12e22555_fig4.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </disp-formula>
        </p>
        <p>In the sigmoid function σ(<italic>x<sub>i</sub><sup>T</sup>β</italic>), <italic>β</italic>=(<italic>β<sub>0</sub></italic>,<italic>β<sub>1</sub></italic>,...,<italic>β<sub>d</sub></italic>) are the model parameters. By training a logistic regression model through minimization of the following cost function, we can obtain the optimal model parameters:</p>
        <p>
          <disp-formula>
            <graphic xlink:href="jmir_v22i12e22555_fig5.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </disp-formula>
        </p>
      </sec>
      <sec>
        <title>Homomorphic Encryption</title>
        <p>Homomorphic encryption is a special type of encryption scheme that allows computations on ciphertexts without the need to access a secret key. Once the result of the computation is decrypted, it matches the result of the operations as if they were performed on the plaintext.</p>
        <p>In our proposed protocol, we use a ring learning with errors (RLWE)–based, somewhat homomorphic encryption scheme, called Brakerski/Fan-Vercauteren (BFV) and which supports a limited number of additions and multiplications, to perform secure multiparty logistic regression [<xref ref-type="bibr" rid="ref25">25</xref>,<xref ref-type="bibr" rid="ref26">26</xref>]. The BFV scheme has some helpful properties for our protocol. First, it is more practical than the other 2 types of homomorphic encryption schemes, namely, partial and fully homomorphic encryption. More specifically, fully homomorphic encryption requires time-consuming bootstrapping to support an unlimited number of operations, whereas partial homomorphic encryption allows only addition or multiplication between ciphertexts. For example, the Paillier scheme only supports addition between ciphertexts, meaning that a ciphertext can only be multiplied by a plaintext, which results in massive transfer consumption if a large number of multiplications and the security of the plaintext are required [<xref ref-type="bibr" rid="ref27">27</xref>]. Furthermore, some optimization techniques can be used to greatly improve the computation performance in the BFV scheme as long as we set the encryption parameters properly, such as number theoretic transform (NTT) and Chinese remainder theorem (CRT) batching [<xref ref-type="bibr" rid="ref28">28</xref>]. Finally, the BFV scheme can be extended to threshold homomorphic encryption for secure multiparty computations.</p>
        <p>The details of the threshold variant of the BFV scheme are described as follows. The security and noise analysis of the scheme are provided in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> [<xref ref-type="bibr" rid="ref25">25</xref>,<xref ref-type="bibr" rid="ref29">29</xref>]:</p>
        <list list-type="order">
          <list-item>
            <p>setup(1<sup>λ</sup>): takes the security parameter λ as an input and returns the public parameterization param, including the degree of polynomial modulus n, the coefficient modulus q, the plaintext modulus t, and the (key, error) distribution (D1, D2).</p>
          </list-item>
          <list-item>
            <p>THE.keygenSP(param): the service provider samples a ← R<sub>q</sub> and outputs it. Here, R<sub>q</sub>=Z<sub>q</sub>[x]/(x<sup>n</sup>+1) is the ciphertext space of param.</p>
          </list-item>
          <list-item>
            <p>THE.keygenSkpk(param, a): each party p<sub>i</sub> samples s<sub>i</sub> ← D1, e<sub>i</sub> ← D2, sets si as its secret key and outputs its public key pk<sub>i</sub>=[−(a · s<sub>i</sub>+e<sub>i</sub>)]<sub>q</sub>. Let subscript *<sub>co</sub> denote the combined key. The combined public key pk<sub>co</sub> among parties p<sub>1</sub>,...,p<sub>z</sub> is then computed as follows: <inline-graphic xlink:href="jmir_v22i12e22555_fig6.png" xlink:type="simple" mimetype="image"/></p>
          </list-item>
          <list-item>
            <p>THE.keygenRelin(param, s<sub>1</sub>,...,s<sub>z</sub>): the parties together with the service provider generate the combined relinearization key rlk<sub>co</sub>. As the generation of the relinearization key is rather complicated, we will show the details of this step later.</p>
          </list-item>
          <list-item>
            <p>THE.encrypt(m, pk<sub>co</sub>): This takes a polynomial m∈R<sub>t</sub> as the input, where R<sub>t</sub> is the plaintext space of the param. Let pk<sub>co</sub>=(pk<sub>co</sub>(0), pk<sub>co</sub>(1)) and Δ=⌊q/t⌋, and sample u ← D1 and (e<sub>1</sub>, e<sub>2</sub>) ← D<sub>2</sub>, then return:</p>
            <p>
              <disp-formula>
                <graphic xlink:href="jmir_v22i12e22555_fig7.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
              </disp-formula>
            </p>
          </list-item>
          <list-item>
            <p>THE.eval(C, rlk<sub>co</sub>, c<sub>1</sub>,...,c<sub>c</sub>): given a circuit C, a tuple of ciphertexts encrypted by the same public key, and the corresponding relinearization key, this outputs a ciphertext c<sub>out</sub>. The procedure for homomorphic addition and multiplication is the same as that in the original single-key BFV scheme.</p>
          </list-item>
          <list-item>
            <p>THE.decrypt(c, s<sub>1</sub>,...,s<sub>z</sub>): given the ciphertext c=(c(0), c(1)) encrypted by pk<sub>co</sub> and the corresponding secret keys, sample (e<sub>1</sub>, e<sub>z</sub>) ← D<sub>smg</sub>. Here, the subscript *<sub>smg</sub> means that the variance of the noise distribution is much larger than that of the input ciphertext noise distribution to guarantee circuit privacy through smudging techniques [<xref ref-type="bibr" rid="ref22">22</xref>]. The partial decryption shares are then computed as follows:</p>
            <p>
              <disp-formula>
                <graphic xlink:href="jmir_v22i12e22555_fig8.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
              </disp-formula>
            </p>
            <p>These shares are sent to the party that requires the unencrypted result. The decryption result <italic>m</italic> is obtained by</p>
            <p>
              <disp-formula>
                <graphic xlink:href="jmir_v22i12e22555_fig9.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
              </disp-formula>
            </p>
          </list-item>
        </list>
      </sec>
      <sec>
        <title>Workflow of the Presented Protocol</title>
        <p>The workflow of our proposed protocol consists of 5 major steps, as shown in <xref rid="figure2" ref-type="fig">Figure 2</xref>.</p>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>Workflow of the proposed protocol.</p>
          </caption>
          <graphic xlink:href="jmir_v22i12e22555_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <sec>
          <title>Initialization of Encryption Parameters</title>
          <p>The service provider initializes the BFV homomorphic encryption parameters. These parameters should be carefully selected because they affect many aspects of the encryption scheme, such as operational performance, security level, multiplicative depth of the circuit, and space consumption. Two sets of parameters must be initialized by the service provider, one for the privacy-preserving logistic regression—<italic>param1</italic>=(<italic>n1</italic>, <italic>q1</italic>, <italic>t1</italic>, <italic>D1<sub>1</sub></italic>, <italic>D1<sub>2</sub></italic>) and the other for the generation of the relinearization key in a secure manner—<italic>param2</italic>=(<italic>n2</italic>, <italic>q2</italic>, <italic>t2</italic>, <italic>D2<sub>1</sub></italic>, <italic>D2<sub>2</sub></italic>). Once initialized, the 2 sets of parameters are sent to the data providers and researchers.</p>
          <p>To make the encryption scheme practical, these parameters should meet the following criteria. First, the degree of polynomial modulus <italic>n</italic> must be a power of 2. Second, the coefficient modulus and the plaintext modulus must be either a prime <italic>P</italic> that satisfies <italic>P</italic>=1 (mod 2<italic>n</italic>) or a composite number that is a product of distinct primes, where every prime satisfies the above condition. After setting appropriate encryption parameters, NTT can be used to accelerate the multiplications between polynomials from o(<italic>n<sup>2</sup></italic>) to o(<italic>n</italic>log<italic>n</italic>), whereas the adoption of CRT can improve the performance of the multiplications and additions of large integers, accelerating the multiplication and addition of the polynomials [<xref ref-type="bibr" rid="ref30">30</xref>]. More importantly, we can apply CRT batching to greatly reduce space and computational consumption. Given a certain degree of polynomial modulus <italic>n</italic>, we can pack up to <italic>n</italic> values into one polynomial using CRT batching and apply the arithmetic operations to all the values within this polynomial in a single instruction, multiple data (SIMD) manner, whereas in a naive manner, we place a single value into one polynomial and apply operations to only one value.</p>
          <p>Furthermore, to generate relinearization keys safely and correctly, the 2 sets of parameters must satisfy the following requirements: (1) their polynomial moduli must share the same degree and (2) the plaintext modulus in <italic>param2</italic> must be equal to the coefficient modulus in <italic>param1</italic>.</p>
        </sec>
        <sec>
          <title>Research Application</title>
          <p>The research application consists of several message transfers among the data providers, service providers, and researchers. First, a researcher visits the website deployed by the service provider and sets up a new research study. When the research begins, 3 settings must be confirmed by the researcher: first, the query condition used to obtain the research data; second, the list of data providers from which the researcher wishes to obtain the research data; finally, the settings of the secure logistic regression, including the variables to be used as features and the variable to be used as a class label and the settings of the maximum number of iterations, learning rate, and termination condition of the model training. This information is stored in the database of the service provider and sent to the corresponding data providers as a research request. After receiving the request, the data providers decide whether to authorize this research and send their decision to the service provider to inform the corresponding researcher about the authorization status.</p>
        </sec>
        <sec>
          <title>Key Generation and Data Preparation</title>
          <p>Once the data providers complete the research authorization, key generation is implemented by an interactive protocol among all parties, which comprises 2 steps—THE.keygenSP and THE.keygenSkpk. After this procedure, each party <italic>p<sub>i</sub></italic> holds its secret keys <italic>s1<sub>i</sub></italic> and <italic>s2<sub>i</sub></italic>, whereas 2 corresponding public keys <italic>pk1<sub>co</sub></italic> and <italic>pk2<sub>co</sub></italic> are broadcasted among all parties. Here, the number in the symbol represents the set of parameters to which these keys belong.</p>
          <p>The data preparation phase then begins, which is described as follows:</p>
          <list list-type="order">
            <list-item>
              <p>The data provider generates their own research data according to the query condition of the research. Next, all the floating-point numbers in the research data are scaled and rounded into integers because all the operations in the BFV scheme are integer based. Categorical features are encoded as integers if they are Boolean or ordered; otherwise, one-hot encoding is implemented.</p>
            </list-item>
            <list-item>
              <p>The data provider encodes the research data by CRT batching. As mentioned before, we can pack multiple values into one polynomial and apply operations to them in an SIMD manner via CRT batching. This means that when given a data set with d features and N samples, one can pack them into d+1 polynomials (d features and 1 class label) as long as the degrees of the polynomial moduli are larger than N.</p>
            </list-item>
            <list-item>
              <p>The data provider encrypts all the CRT-batched polynomials using the combined public key pk1<sub>co</sub>. After all the plaintext polynomials are encrypted, they are sent to the service provider.</p>
            </list-item>
          </list>
        </sec>
        <sec>
          <title>Relinearization Key Generation</title>
          <p>After data preparation, the researcher, and all involved data providers together with the service provider generate the combined relinearization key. The relinearization step is not necessary for the correctness of homomorphic multiplication but is essential in our threshold-variant BFV scheme. By performing relinearization after every homomorphic multiplication, the size of the ciphertext can be strictly kept at 2, which simplifies decryption.</p>
          <p>The relinearization key generation procedure is illustrated next. We denote the number of parties by <italic>z</italic>. Suppose the coefficient modulus in <italic>param1</italic> is a product of <italic>k</italic> distinct primes, whereas each party <italic>p<sub>i</sub></italic> holds 2 secret keys <italic>s1<sub>i</sub></italic> and <italic>s2<sub>i</sub></italic> from <italic>param1</italic> and <italic>param2</italic>, respectively. Given a combined public key <italic>pk2<sub>co</sub></italic> from <italic>param2</italic>, the following is observed:</p>
          <list list-type="order">
            <list-item>
              <p>Each party p<sub>i</sub> performs THE.encrypt(s1<sub>i</sub>, pk2<sub>co</sub>) and outputs k ciphertexts, of which the plaintext modulus is a group of primes whose product is the coefficient modulus in param1. The ciphertexts of secret key c<sub>j</sub>(s1<sub>i</sub>) (j=1,...,k) are then sent to the service provider.</p>
            </list-item>
            <list-item>
              <p>The service provider computes the ciphertexts of the combined secret key c<sub>j</sub>(s1<sub>co</sub>) (j=1,...,k) and sends them to the data provider and researcher:</p>
              <p>
                <disp-formula>
                  <graphic xlink:href="jmir_v22i12e22555_fig10.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
                </disp-formula>
              </p>
            </list-item>
            <list-item>
              <p>Each party p<sub>i</sub> computes the ciphertexts of the product of the combined secret key and its secret key from <italic>param1</italic> as follows and sends the result to the service provider:</p>
              <p>
                <disp-formula>
                  <graphic xlink:href="jmir_v22i12e22555_fig11.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
                </disp-formula>
              </p>
              <p>Here, <italic>c<sub>j</sub></italic>(0) (<italic>j</italic>=1,...,<italic>k</italic>) are the ciphertexts of 0, which contain sufficiently large noise to guarantee function privacy [<xref ref-type="bibr" rid="ref31">31</xref>].</p>
            </list-item>
            <list-item>
              <p>The service provider computes the ciphertexts of the square of the combined secret key c<sub>j</sub>(s1<sub>co<sup>2</sup></sub>) (j=1,...,k) as follows:</p>
              <p>
                <disp-formula>
                  <graphic xlink:href="jmir_v22i12e22555_fig12.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
                </disp-formula>
              </p>
              <p>Having encrypted the combined secret key and its square, the service provider defines the decomposition bit count <italic>T</italic> and the size of the relinearization key <italic>L</italic>=⌊log<sub>2</sub>(<italic>q1</italic>)/<italic>T</italic>⌋, samples <italic>a<sub>0</sub></italic> ~ <italic>a<sub>L</sub></italic> ← <italic>R<sub>q1</sub></italic>, whereas each party <italic>p<sub>i</sub></italic> samples <italic>e<sub>i0</sub></italic> ~ <italic>e<sub>iL</sub></italic> ← <italic>D1<sub>2</sub></italic>, performs THE.encrypt(<italic>e<sub>i0</sub></italic> ~ <italic>e<sub>iL</sub></italic>, <italic>pk2<sub>co</sub></italic>) and sends these ciphertexts <italic>c<sub>j</sub></italic>(<italic>e<sub>i0</sub></italic> ~ <italic>e<sub>iL</sub></italic>) (<italic>j</italic>=1,...,<italic>k</italic>) to the service provider. After receiving encrypted noise, the service provider computes the following:</p>
              <p>
                <disp-formula>
                  <graphic xlink:href="jmir_v22i12e22555_fig13.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
                </disp-formula>
              </p>
              <p>The encrypted combined relinearization key is then generated as follows: all parties perform THE.decrypt(<italic>c<sub>j</sub></italic>(<italic>rlk<sub>co</sub></italic>), <italic>s2<sub>1</sub></italic>,...,<italic>s2<sub>z</sub></italic>) and finally return the plaintext combined relinearization key <italic>rlk<sub>co</sub></italic>. Compared with the combined relinearization key generation procedure presented in the study by Mouchet et al [<xref ref-type="bibr" rid="ref22">22</xref>], our method involves more transfer consumption but much less noise, which grows only linearly with the number of parties</p>
              <p>
                <disp-formula>
                  <graphic xlink:href="jmir_v22i12e22555_fig14.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
                </disp-formula>
              </p>
            </list-item>
          </list>
        </sec>
        <sec>
          <title>Privacy-Preserving Model Training and Evaluation</title>
          <p>Secure logistic regression model training begins once all the encrypted research data and the combined relinearization key are sent to the service provider. We choose the gradient descent algorithm to train the model with homomorphically encrypted data because we can implement the algorithm using only addition and multiplication, which all fully and somewhat homomorphic encryption schemes naturally have, whereas despite its faster convergence, Newton method requires matrix inversion, which may have a very high time cost under the homomorphic encryption computation [<xref ref-type="bibr" rid="ref32">32</xref>].</p>
          <p>After choosing the proper training method, another major problem is the evaluation of the sigmoid function σ(<italic>x<sup>T</sup>β</italic>), because the BFV scheme can only be used to evaluate polynomial functions. Instead of simply using the Taylor polynomial to approximate the sigmoid function, we use the degree-3 least squares approximation of the sigmoid function over the interval (−8, 8), as the former has a much larger error as &#124;<italic>x<sup>T</sup>β</italic>&#124; increases, whereas the latter only has a small error as long as <italic>x<sup>T</sup>β</italic> is within the interval [<xref ref-type="bibr" rid="ref13">13</xref>]. The least squares approximation polynomial is:</p>
          <p>
            <disp-formula>
              <graphic xlink:href="jmir_v22i12e22555_fig15.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
            </disp-formula>
          </p>
          <p>As the BFV scheme is based on integers, we apply scaling factor (SF) to scale up the floating-point number <italic>x<sup>T</sup>β</italic> into the integer ⌊<italic>x<sup>T</sup>β</italic>×<italic>SF</italic>⌉. In our privacy-preserving logistic regression protocol, we set <italic>SF</italic>=1000, which is a trade-off between approximation accuracy and performance. Specifically, if we set <italic>SF</italic> smaller, the approximation accuracy will decrease; if we set <italic>SF</italic> larger, 2 or more polynomials may be required to represent a set of values, or larger encryption parameters may be required to maintain the same multiplicative depth for a given security level, both of which result in larger space and computational resource consumption. This SF also scales up the approximation interval from (−8, 8) to (−8000, 8000), scaling the degree-1 and degree-3 coefficients to 1/1000 and 1/1000<sup>3</sup>, respectively, of the original value. Finally, the least squares approximation function is integerized to be compatible with the homomorphic encryption computation:</p>
          <p>
            <disp-formula>
              <graphic xlink:href="jmir_v22i12e22555_fig16.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
            </disp-formula>
          </p>
          <p>The integerized function output is then transformed into an original function:</p>
          <p>
            <disp-formula>
              <graphic xlink:href="jmir_v22i12e22555_fig17.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
            </disp-formula>
          </p>
          <p>We now describe the detailed process of secure logistic regression. Before training begins, the involved data providers divide their own research data into 10 folds from <italic>Fold1</italic>~<italic>Fold10</italic> for 10-fold cross-validation and then encode the information into a vector. For example, a data set <italic>x</italic> containing 20 samples is divided as follows:</p>
          <p><italic>Fold1</italic> ~ (<italic>x<sub>1</sub></italic>, <italic>x<sub>6</sub></italic>), <italic>Fold2</italic> ~ (<italic>x<sub>2</sub></italic>, <italic>x<sub>17</sub></italic>), <italic>Fold3</italic> ~ (<italic>x<sub>3</sub></italic>, <italic>x<sub>13</sub></italic>), <italic>Fold4</italic> ~ (<italic>x<sub>4</sub></italic>, <italic>x<sub>10</sub></italic>), <italic>Fold5</italic> ~ (<italic>x<sub>5</sub></italic>, <italic>x<sub>20</sub></italic>), <italic>Fold6</italic> ~ (<italic>x<sub>7</sub></italic>, <italic>x<sub>16</sub></italic>), <italic>Fold7</italic> ~ (<italic>x<sub>8</sub></italic>, <italic>x<sub>14</sub></italic>), <italic>Fold8</italic> ~ (<italic>x<sub>9</sub></italic>, <italic>x<sub>11</sub></italic>), <italic>Fold9</italic> ~ (<italic>x<sub>12</sub></italic>, <italic>x<sub>18</sub></italic>), <italic>Fold10</italic> ~ (<italic>x<sub>15</sub></italic>, <italic>x<sub>19</sub></italic>)</p>
          <p>Next, the information is encoded into a vector of values (1, 2, 3, 4, 5, 1, 6, 7, 8, 4, 8, 9, 3, 7, 10, 6, 2, 9, 10, 5). The vector can be viewed as a special column of research data, although this column is not used in the computation of the approximation sigmoid function.</p>
          <p>When all the data providers finish dividing their research data, they send these vectors to the service provider. As these vectors do not contain any sensitive information, they do not need to be further encoded into CRT-batched polynomials and encrypted.</p>
          <p>After all preparations are completed, the model training begins, as shown in <xref ref-type="boxed-text" rid="box1">Textboxes 1</xref>-<xref ref-type="boxed-text" rid="box3">3</xref>. In <xref ref-type="boxed-text" rid="box1">Textbox 1</xref>, we use minibatch gradient descent instead of batch gradient descent because the former converges faster, and we can make full use of CRT batching by simultaneously training 10 models for 10-fold cross-validation, which vastly reduces the time cost of model training. Specifically, for each iteration, the researcher assigns the sets of parameters to the research samples according to the number of iterations and the fold to which these samples belong, which means that in one iteration, a one-to-one correspondence exists between the 10 sets of parameters and the 10 folds of research data. Once the gradient ciphertexts are computed, all data providers will mask them via randomly generated encrypted noises (<xref ref-type="boxed-text" rid="box3">Textbox 3</xref>). The masked gradients are then decrypted, and only the researcher can obtain the plaintext result. As the researcher only knows the sum of noises for each fold, the correct overall gradients are finally obtained to update the model parameters of the researcher without revealing the gradient of any single sample.</p>
          <boxed-text id="box1" position="float">
            <title>Privacy-preserving logistic regression model training.</title>
            <p>Input: <italic>epoch</italic> (# of iterations), <italic>α</italic> (learning rate), <italic>ε</italic> (step tolerance), <italic>c</italic>(<italic>x</italic>)={<italic>c</italic>(<italic>x<sup>1</sup></italic>),...,<italic>c</italic>(<italic>x<sup>d</sup></italic>),<italic>c</italic>(<italic>y</italic>)} (encrypted research data), <italic>x<sup>d+1</sup></italic> (vector describing how data providers divide their research data), <italic>b</italic> (# of samples in one fold), <italic>z</italic> (# of parties), <italic>s1<sub>1</sub></italic> ~ <italic>s1<sub>z</sub></italic> (secret keys), <italic>pk1<sub>co</sub></italic> (combined public key), <italic>β</italic>(1) ~ <italic>β</italic>(10) (model parameters initialized by researcher where each <italic>β</italic>(<italic>i</italic>)=(<italic>β</italic>(<italic>i</italic>)<italic><sup>0</sup></italic>, <italic>β</italic>(<italic>i</italic>)<italic><sup>1</sup></italic>,...,<italic>β</italic>(<italic>i</italic>)<italic><sup>d</sup></italic>))</p>
            <p>Output: <italic>β<sub>new</sub></italic>(1) ~ <italic>β<sub>new</sub></italic>(10) (trained model parameters)</p>
            <p>Researcher does:</p>
            <p>1: For <italic>iter</italic>=1 to <italic>epoch</italic> / 9</p>
            <p>2: <italic>β<sub>old</sub></italic>(1) ~ <italic>β<sub>old</sub></italic>(10) ← <italic>β</italic>(1) ~ <italic>β</italic>(10)</p>
            <p>3: For <italic>cv</italic>=1 to 9</p>
            <p>4: <italic>B</italic> ← empty vector</p>
            <p>5: For-each element <italic>i</italic> in <italic>x<sup>d+1</sup></italic></p>
            <p>6: <italic>B</italic>.push_back(<italic>β</italic>((<italic>i</italic>+<italic>cv</italic>−1) mod 10+1))</p>
            <p>7: End for-each</p>
            <p>8: <italic>B’</italic> ← CRT-batchingEncode(<italic>B</italic>) // <italic>B’</italic>={<italic>B’<sup>0</sup></italic>,…,<italic>B’<sup>d</sup></italic>}</p>
            <p>9: <italic>c</italic>(<italic>B<sup>0</sup></italic>) ~ <italic>c</italic>(<italic>B<sup>d</sup></italic>) ← THE.encrypt(<italic>B’</italic>, <italic>pk1<sub>co</sub></italic>)</p>
            <p>10: Wait for encrypted gradient calculation <italic>c</italic>(<italic>gra<sup>0</sup></italic>) ~ <italic>c</italic>(<italic>gra<sup>d</sup></italic>) // See (<xref ref-type="boxed-text" rid="box2">Textbox 2</xref>) for details</p>
            <p>11: Wait for securely decryption of encrypted gradients <italic>gra</italic>(1) ~ <italic>gra</italic>(10) // See (<xref ref-type="boxed-text" rid="box3">Textbox 3</xref>) for details</p>
            <p>12: <italic>β</italic>(1) ~ <italic>β</italic>(10) −= (<italic>gra</italic>(1) ~ <italic>gra</italic>(10))×<italic>α</italic> ÷ <italic>b</italic></p>
            <p>13: End for</p>
            <p>14: <italic>β<sub>new</sub></italic>(1) ~ <italic>β<sub>new</sub></italic>(10) ← <italic>β</italic>(1) ~ <italic>β</italic>(10)</p>
            <p>15: If (&#124;&#124;<italic>β<sub>new</sub></italic>−<italic>β<sub>old</sub></italic>&#124;&#124; ÷ &#124;&#124;<italic>β<sub>new</sub></italic>&#124;&#124;&#60;<italic>ε</italic>) then</p>
            <p>16: return <italic>β<sub>new</sub></italic>(1) ~ <italic>β<sub>new</sub></italic>(10)</p>
            <p>17: End if</p>
            <p>18:End for</p>
          </boxed-text>
          <boxed-text id="box2" position="float">
            <title>Encrypted gradient calculation.</title>
            <p>Input: <italic>c</italic>(<italic>B<sup>0</sup></italic>) ~ <italic>c</italic>(<italic>B<sup>d</sup></italic>), <italic>c</italic>(<italic>x</italic>) // See details in (<xref ref-type="boxed-text" rid="box1">Textbox 1</xref>)</p>
            <p>Output: <italic>c</italic>(<italic>gra<sup>0</sup></italic>) ~ <italic>c</italic>(<italic>gra<sup>d</sup></italic>) (encrypted gradients)</p>
            <p>Service provider does:</p>
            <p>1: <italic>c</italic>(<italic>x<sup>T</sup>β</italic>) ← <italic>c</italic>(<italic>B<sup>0</sup></italic>)+<italic>c</italic>(<italic>B<sup>1</sup></italic>)×<italic>c</italic>(<italic>x<sup>1</sup></italic>)+...+<italic>c</italic>(<italic>B<sup>d</sup></italic>)×<italic>c</italic>(<italic>x<sup>d</sup></italic>)</p>
            <p>2: <italic>c</italic>(<italic>G</italic>) ← G<sub>3</sub>(<italic>c</italic>(<italic>x<sup>T</sup>β</italic>)) // G<sub>3</sub> is an integerized sigmoid function</p>
            <p>3: <italic>c</italic>(<italic>gra<sup>0</sup></italic>) ~ <italic>c</italic>(<italic>gra<sup>d</sup></italic>) ← [<italic>c</italic>(<italic>G</italic>)−627743311836×<italic>c</italic>(<italic>y</italic>)]×[<italic>c</italic>(<italic>x<sup>0</sup></italic>) ~ <italic>c</italic>(<italic>x<sup>d</sup></italic>)] // Here, <italic>c</italic>(<italic>x<sup>0</sup></italic>)=1</p>
          </boxed-text>
          <boxed-text id="box3" position="float">
            <title>Secure decryption of encrypted gradients.</title>
            <p>Input: <italic>x<sup>d+1</sup></italic>, <italic>cv</italic>, <italic>c</italic>(<italic>gra<sup>0</sup></italic>) ~ <italic>c</italic>(<italic>gra<sup>d</sup></italic>), <italic>s1<sub>1</sub></italic> ~ <italic>s1<sub>z</sub></italic>, <italic>pk1<sub>co</sub></italic> // See details in (<xref ref-type="boxed-text" rid="box1">Textbox 1</xref>)</p>
            <p>Output: <italic>gra</italic>(1) ~ <italic>gra</italic>(10) (unencrypted gradients)</p>
            <p>All data providers do:</p>
            <p>1: <italic>e<sup>0</sup></italic> ~ <italic>e<sup>d</sup></italic> ← random noise vectors whose size equals <italic>x<sup>d+1</sup></italic></p>
            <p>2: <italic>E</italic>(1) ~ <italic>E</italic>(10) ← zero vectors whose size equals <italic>d</italic>+1</p>
            <p>3: For <italic>i</italic>=1 to size(<italic>x<sup>d+1</sup></italic>)</p>
            <p>4: For <italic>j</italic>=1 to <italic>d</italic>+1</p>
            <p>5: <italic>E</italic>((<italic>x<sup>d+1</sup></italic>(<italic>i</italic>)+<italic>cv</italic>−1) mod 10+1)(<italic>j</italic>)+= <italic>e<sup>j−1</sup></italic>(<italic>i</italic>)</p>
            <p>6: End for</p>
            <p>7: End for // <italic>E</italic>(1) ~ <italic>E</italic>(10) are sent to the researcher</p>
            <p>8: <italic>e’</italic> ← CRT-batchingEncode(<italic>e<sup>0</sup></italic> ~ <italic>e<sup>d</sup></italic>)</p>
            <p>9: <italic>c</italic>(<italic>e<sup>0</sup></italic>) ~ <italic>c</italic>(<italic>e<sup>d</sup></italic>) ← THE.encrypt(<italic>e’</italic>, <italic>pk1<sub>co</sub></italic>) // <italic>c</italic>(<italic>e<sup>0</sup></italic>) ~ <italic>c</italic>(<italic>e<sup>d</sup></italic>) are sent to the service provider</p>
            <p>Service provider does:</p>
            <p>10:<italic>c’</italic>(<italic>gra<sup>0</sup></italic>) ~ <italic>c’</italic>(<italic>gra<sup>d</sup></italic>) ← <italic>c</italic>(<italic>gra<sup>0</sup></italic>) ~ <italic>c</italic>(<italic>gra<sup>d</sup></italic>)+<italic>c</italic>(<italic>e<sup>0</sup></italic>) ~ <italic>c</italic>(<italic>e<sup>d</sup></italic>)</p>
            <p>All parties do:</p>
            <p>11:<italic>gra’<sup>0</sup></italic> ~ <italic>gra’<sup>d</sup></italic> ← THE.decrypt(<italic>c’</italic>(<italic>gra<sup>0</sup></italic>) ~ <italic>c’</italic>(<italic>gra<sup>d</sup></italic>), <italic>s1<sub>1</sub></italic> ~ <italic>s1<sub>z</sub></italic>) // To ensure only the researcher obtains the plaintext result, data providers’ and researcher’s partial decryption shares are added at the service provider and the researcher, respectively.</p>
            <p>Researcher does:</p>
            <p>12:<italic>gra</italic>(1) ~ <italic>gra</italic>(10) ← zero vectors whose size equals <italic>d</italic>+1</p>
            <p>13:<italic>gra’’<sup>0</sup></italic> ~ <italic>gra’’<sup>d</sup></italic> ← CRT-batchingDecode(<italic>gra’<sup>0</sup></italic> ~ <italic>gra’<sup>d</sup></italic>) // Decoding result is vectors whose size equals <italic>x<sup>d+1</sup></italic>.</p>
            <p>14:For <italic>i</italic>=1 to size(<italic>x<sup>d+1</sup></italic>)</p>
            <p>15: For <italic>j</italic>=1 to <italic>d</italic>+1</p>
            <p>16: <italic>gra</italic>((<italic>x<sup>d+1</sup></italic>(<italic>i</italic>)+<italic>cv</italic>−1) mod 10+1)(<italic>j</italic>)+= <italic>gra’’<sup>j</sup></italic><sup>−</sup><italic><sup>1</sup></italic>(<italic>i</italic>)</p>
            <p>17: End for</p>
            <p>18:End for</p>
            <p>19:<italic>gra</italic>(1) ~ <italic>gra</italic>(10) −= <italic>E</italic>(1) ~ <italic>E</italic>(10)</p>
          </boxed-text>
          <p>Once the model training is completed, all involved data providers encode their own research data for each fold into CRT-batched polynomials whose slots are randomly chosen to contain samples. In the meantime, the data providers also generate vectors containing information about whether a certain slot contains a sample and encode them into CRT-batched polynomials. For instance, for a CRT-batched polynomial containing samples in slots (1, 6, 8), the vector should be (1, 0, 0, 0, 0, 1, 0, 1). These polynomials are then encrypted by <italic>pk1<sub>co</sub></italic> and sent to the service provider.</p>
          <p>When all the aforementioned preparations are completed, the model evaluation starts, as shown in <xref ref-type="boxed-text" rid="box4">Textboxes 4</xref>-<xref ref-type="boxed-text" rid="box6">6</xref>. In <xref ref-type="boxed-text" rid="box5">Textbox 5</xref>, lines 3-5, all data providers mask the encrypted predictive values. Here, the noise generation should meet 2 criteria, whereas the noise generation in <xref ref-type="boxed-text" rid="box3">Textbox 3</xref> line 1 has no special limitations as long as the error is random and sufficiently large to mask the true values. First, in the empty slots, we sample noise from a uniform distribution whose upper and lower bounds are the minimum and maximum values of the integerized approximation sigmoid function G<sub>3</sub>. Second, in the slots containing samples, we sample noise from a uniform distribution (−1569358279, 1569358279) whose corresponding values are (−0.005, 0.005) in the scaled down plaintext. In <xref ref-type="boxed-text" rid="box6">Textbox 6</xref>, lines 1-3, all data providers perform another masking; this time, the noise generation is exactly the same as in <xref ref-type="boxed-text" rid="box3">Textbox 3</xref> line 1.</p>
          <boxed-text id="box4" position="float">
            <title>Model evaluation.</title>
            <p>Input: <italic>c</italic>(1)(<italic>x</italic>, <italic>y</italic>) ~ <italic>c</italic>(10)(<italic>x</italic>, <italic>y</italic>) (10 encrypted folds of research data), <italic>c</italic>(1)(<italic>x<sup>d+1</sup></italic>) ~ <italic>c</italic>(10)(<italic>x<sup>d+1</sup></italic>) (encrypted vectors indicating whether a certain slot contains a sample), <italic>β</italic>(1) ~ <italic>β</italic>(10) (trained sets of parameters), <italic>s1<sub>1</sub></italic> ~ <italic>s1<sub>z</sub></italic> (secret keys), <italic>pk1<sub>co</sub></italic> (combined public key)</p>
            <p>Output: <italic>TP</italic>, <italic>FP</italic>, <italic>TN</italic>, <italic>FN</italic> (number of true positives, false positives, true negatives, and false negatives, respectively, under different predictive value thresholds)</p>
            <p>Researcher does:</p>
            <p>1: For <italic>FD</italic>=1 to 10</p>
            <p>2: <italic>c</italic>(<italic>β<sup>0</sup></italic>) ~ <italic>c</italic>(<italic>β<sup>d</sup></italic>) ← THE.encrypt(<italic>β</italic>(<italic>FD</italic>), <italic>pk1<sub>co</sub></italic>) // <italic>c</italic>(<italic>β<sup>0</sup></italic>) ~ <italic>c</italic>(<italic>β<sup>d</sup></italic>) are sent to the service provider</p>
            <p>3: Wait for masked predictive values <italic>σ</italic> // See (<xref ref-type="boxed-text" rid="box5">Textbox 5</xref>) for details</p>
            <p>4: For <italic>V</italic>=min(<italic>G<sub>3</sub></italic>) : (max(<italic>G<sub>3</sub></italic>)–min(<italic>G<sub>3</sub></italic>))/100 : max(<italic>G<sub>3</sub></italic>)</p>
            <p>5: <italic>X</italic> ← empty vector</p>
            <p>6: For-each predictive value <italic>σ<sub>i</sub></italic> in <italic>σ</italic></p>
            <p>7: <italic>X</italic>.push_back(if(<italic>σ<sub>i</sub></italic>≥<italic>V</italic>))</p>
            <p>8: End for-each</p>
            <p>9: <italic>X’</italic> ← CRT-batchingEncode(<italic>X</italic>)</p>
            <p>10: <italic>c</italic>(<italic>TP</italic>), <italic>c</italic>(<italic>FP</italic>), <italic>c</italic>(<italic>TN</italic>), <italic>c</italic>(<italic>FN</italic>) ← <italic>c</italic>(<italic>FD</italic>)(<italic>y</italic>)×<italic>X’</italic>×<italic>c</italic>(<italic>FD</italic>)(<italic>x<sup>d+1</sup></italic>), (1−<italic>c</italic>(<italic>FD</italic>)(<italic>y</italic>))×<italic>X’</italic>×<italic>c</italic>(<italic>FD</italic>)(<italic>x<sup>d+1</sup></italic>), (1−<italic>c</italic>(<italic>FD</italic>)(<italic>y</italic>))×(1−<italic>X’</italic>)×<italic>c</italic>(<italic>FD</italic>)(<italic>x<sup>d+1</sup></italic>), <italic>c</italic>(<italic>FD</italic>)(<italic>y</italic>)×(1−<italic>X’</italic>)×<italic>c</italic>(<italic>FD</italic>)(<italic>x<sup>d+1</sup></italic>) // These 4 ciphertexts are sent to the service provider</p>
            <p>11: Wait for masked model evaluation results <italic>TP’</italic>, <italic>FP’</italic>, <italic>TN’</italic>, <italic>FN’</italic> // See (<xref ref-type="boxed-text" rid="box6">Textbox 6</xref>) for details</p>
            <p>12: <italic>TP’’</italic>, <italic>FP’’</italic>, <italic>TN’’</italic>, <italic>FN’’</italic> ← CRT-batchingDecode(<italic>TP’</italic>, <italic>FP’</italic>, <italic>TN’</italic>, <italic>FN’</italic>)</p>
            <p>13: <italic>TP</italic>, <italic>FP</italic>, <italic>TN</italic>, <italic>FN</italic> ← <italic>TP’’</italic>−sum(<italic>e<sub>TP</sub></italic>), <italic>FP’’</italic>−sum(<italic>e<sub>FP</sub></italic>), <italic>TN’’</italic>−sum(<italic>e<sub>TN</sub></italic>), <italic>FN’’</italic>−sum(<italic>e<sub>FN</sub></italic>)</p>
            <p>14: output <italic>TP</italic>, <italic>FP</italic>, <italic>TN</italic>, <italic>FN</italic> // under fold <italic>FD</italic> and predictive value threshold <italic>V</italic></p>
            <p>15: End for</p>
            <p>16:End for</p>
          </boxed-text>
          <boxed-text id="box5" position="float">
            <title>Calculation of masked predictive values.</title>
            <p>Input: <italic>c</italic>(1)(<italic>x</italic>) ~ <italic>c</italic>(10)(<italic>x</italic>), <italic>x<sup>d+1</sup></italic>(1) ~ <italic>x<sup>d+1</sup></italic>(10), <italic>c</italic>(<italic>β<sup>0</sup></italic>) ~ <italic>c</italic>(<italic>β<sup>d</sup></italic>), <italic>FD</italic>, <italic>pk1<sub>co</sub></italic>, <italic>s1<sub>1</sub></italic> ~ <italic>s1<sub>z</sub></italic> // See details in (<xref ref-type="boxed-text" rid="box4">Textbox 4</xref>)</p>
            <p>Output: <italic>σ</italic> (masked predictive values)</p>
            <p>Service provider does:</p>
            <p>1: <italic>c</italic>(<italic>x<sup>T</sup>β</italic>) ← <italic>c</italic>(<italic>β<sup>0</sup></italic>)+<italic>c</italic>(<italic>β<sup>1</sup></italic>)×<italic>c</italic>(<italic>FD</italic>)(<italic>x<sup>1</sup></italic>)+...+<italic>c</italic>(<italic>β<sup>d</sup></italic>)×<italic>c</italic>(<italic>FD</italic>)(<italic>x<sup>d</sup></italic>)</p>
            <p>2: <italic>c</italic>(<italic>G</italic>) ← G<sub>3</sub>(<italic>c</italic>(<italic>x<sup>T</sup>β</italic>)) // G<sub>3</sub> is an integerized sigmoid function</p>
            <p>All data providers do:</p>
            <p>3: <italic>e</italic> ← random noise vectors whose size equals <italic>x<sup>d+1</sup></italic>(<italic>FD</italic>)</p>
            <p>4: <italic>e’</italic> ← CRT-batchingEncode(<italic>e</italic>)</p>
            <p>5: <italic>c</italic>(<italic>e’</italic>) ← THE.encrypt(<italic>e’</italic>, <italic>pk1<sub>co</sub></italic>) // <italic>c</italic>(<italic>e’</italic>) are sent to the service provider</p>
            <p>Service provider does:</p>
            <p>6: <italic>c’</italic>(<italic>G</italic>) ← <italic>c</italic>(<italic>G</italic>)+<italic>c</italic>(<italic>e’</italic>)</p>
            <p>All parties do:</p>
            <p>7: <italic>σ</italic> ← CRT-batchingDecode(THE.decrypt(<italic>c’</italic>(<italic>G</italic>), <italic>s1<sub>1</sub></italic> ~ <italic>s1<sub>z</sub></italic>)) // The same as in (<xref ref-type="boxed-text" rid="box3">Textbox 3</xref>), only the researcher obtains the plaintext result</p>
          </boxed-text>
          <boxed-text id="box6" position="float">
            <title>Calculation of masked model evaluation results.</title>
            <p>Input: <italic>c</italic>(1)(<italic>x<sup>d+1</sup></italic>) ~ <italic>c</italic>(10)(<italic>x<sup>d+1</sup></italic>), <italic>FD</italic>, <italic>pk1<sub>co</sub></italic>, <italic>c</italic>(<italic>TP</italic>), <italic>c</italic>(<italic>FP</italic>), <italic>c</italic>(<italic>TN</italic>), <italic>c</italic>(<italic>FN</italic>), <italic>s1<sub>1</sub></italic> ~ <italic>s1<sub>z</sub></italic> // See details in (<xref ref-type="boxed-text" rid="box4">Textbox 4</xref>)</p>
            <p>Output: <italic>TP’</italic>, <italic>FP’</italic>, <italic>TN’</italic>, <italic>FN’</italic> (masked model evaluation results)</p>
            <p>All data providers do:</p>
            <p>1: <italic>e<sub>TP</sub></italic>, <italic>e<sub>FP</sub></italic>, <italic>e<sub>TN</sub></italic>, <italic>e<sub>FN</sub></italic> ← random noise vectors whose size equals to <italic>c</italic>(<italic>FD</italic>)(<italic>x<sup>d+1</sup></italic>) // The sums of noises sum(<italic>e<sub>TP</sub></italic>), sum(<italic>e<sub>FP</sub></italic>), sum(<italic>e<sub>TN</sub></italic>), sum(<italic>e<sub>FN</sub></italic>) are sent to the researcher</p>
            <p>2: <italic>e’<sub>TP</sub></italic>, <italic>e’<sub>FP</sub></italic>, <italic>e’<sub>TN</sub></italic>, <italic>e’<sub>FN</sub></italic> ← CRT-batchingEncode(<italic>e<sub>TP</sub></italic>, <italic>e<sub>FP</sub></italic>, <italic>e<sub>TN</sub></italic>, <italic>e<sub>FN</sub></italic>)</p>
            <p>3: <italic>c</italic>(<italic>e’<sub>TP</sub></italic>), <italic>c</italic>(<italic>e’<sub>FP</sub></italic>), <italic>c</italic>(<italic>e’<sub>TN</sub></italic>), <italic>c</italic>(<italic>e’<sub>FN</sub></italic>) ← THE.encrypt((<italic>e’<sub>TP</sub></italic>, <italic>e’<sub>FP</sub></italic>, <italic>e’<sub>TN</sub></italic>, <italic>e’<sub>FN</sub></italic>), <italic>pk1<sub>co</sub></italic>) // These encrypted noises are sent to the service provider</p>
            <p>Service provider does:</p>
            <p>4: <italic>c’</italic>(<italic>TP</italic>), <italic>c’</italic>(<italic>FP</italic>), <italic>c’</italic>(<italic>TN</italic>), <italic>c’</italic>(<italic>FN</italic>) ← <italic>c</italic>(<italic>TP</italic>)+<italic>c</italic>(<italic>e<sub>TP</sub></italic>), <italic>c</italic>(<italic>FP</italic>)+<italic>c</italic>(<italic>e<sub>FP</sub></italic>), <italic>c</italic>(<italic>TN</italic>)+<italic>c</italic>(<italic>e<sub>TN</sub></italic>), <italic>c</italic>(<italic>FN</italic>)+<italic>c</italic>(<italic>e<sub>FN</sub></italic>)</p>
            <p>All parties do:</p>
            <p>5: <italic>TP’</italic>, <italic>FP’</italic>, <italic>TN’</italic>, <italic>FN’</italic> ← THE.decrypt((<italic>c’</italic>(<italic>TP</italic>), <italic>c’</italic>(<italic>FP</italic>), <italic>c’</italic>(<italic>TN</italic>), <italic>c’</italic>(<italic>FN</italic>)), <italic>s1<sub>1</sub></italic> ~ <italic>s1<sub>z</sub></italic>) // The same as in (<xref ref-type="boxed-text" rid="box3">Textbox 3</xref>), only the researcher obtains the plaintext result</p>
          </boxed-text>
          <p>Once the model evaluation ends, the researcher obtains the number of true positives (TPs), false positives (FPs), true negatives (TNs), and false negatives (FNs) for the 10 folds and different predictive value thresholds, which should be sufficient to evaluate the trained model via 10-fold cross-validation.</p>
        </sec>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Overview</title>
        <p>In this section, we consider the following aspects to assess the performance of our proposed multicenter secure logistic regression protocol: (1) Security analysis: security of sensitive research data and learned model; (2) accuracy loss: the loss in accuracy during the model training and evaluation with respect to the nonsecure method with real medical data; (3) model training and evaluation time: the time needed to perform 10-fold cross-validation with real medical data; and (4) scalability: how the model training and evaluation time increases as the size of the data increases in the synthetic data set.</p>
        <p>The biomedical data sets used for the experiments are shown in <xref ref-type="table" rid="table1">Table 1</xref> [<xref ref-type="bibr" rid="ref33">33</xref>,<xref ref-type="bibr" rid="ref34">34</xref>]. For the breast cancer data set, we eliminate missing samples, use all the attributes except breast-quad, and assume that the data set is provided by 1 data provider. For the surveillance, epidemiology, and end results colorectal cancer data set, we choose a portion of the samples and use 5-year survival status as the label. Moreover, all the attributes, except the registry, are used, and we assume that the data set is provided by 3 different data providers. More details about these 2 data sets are provided in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>. We use 10-fold cross-validation, which partitions the data sets into 10 folds of approximately equal size by stratified sampling to ensure that the positive/negative ratio of each fold is approximately equal. Each time, 9 folds are used as the training set and the remaining fold is used as the test set. In addition, we assume that during model training, all data ciphertexts share the same data division vector so that the gradient ciphertexts can be summed to reduce the size of transferred data in <xref ref-type="boxed-text" rid="box3">Textbox 3</xref> line 11.</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Description of the data sets.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="400"/>
            <col width="300"/>
            <col width="300"/>
            <thead>
              <tr valign="top">
                <td>Data sets</td>
                <td>SEER<sup>a</sup> CRC<sup>b</sup> data [<xref ref-type="bibr" rid="ref31">31</xref>]</td>
                <td>UCI<sup>c</sup> breast cancer [<xref ref-type="bibr" rid="ref32">32</xref>]</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Samples, n</td>
                <td>49152</td>
                <td>277</td>
              </tr>
              <tr valign="top">
                <td>Attributes, n</td>
                <td>10</td>
                <td>9</td>
              </tr>
              <tr valign="top">
                <td>Size of ciphertexts, MB</td>
                <td>60.0</td>
                <td>18.0</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table1fn1">
              <p><sup>a</sup>SEER: surveillance, epidemiology, and end results.</p>
            </fn>
            <fn id="table1fn2">
              <p><sup>b</sup>CRC: colorectal cancer.</p>
            </fn>
            <fn id="table1fn3">
              <p><sup>c</sup>UCI: unique client identifier.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <p>To set the homomorphic encryption parameters, we select the following parameters to guarantee sufficient security, as shown in <xref ref-type="table" rid="table2">Table 2</xref>. Our values for the polynomial modulus, coefficient modulus, and security level match the most recent homomorphic encryption security standards proposed by the Homomorphic-Encryption.org group [<xref ref-type="bibr" rid="ref35">35</xref>]. The degree of polynomial modulus <italic>n</italic> is a power of 2, whereas the coefficient moduli in <italic>param1</italic> and <italic>param2</italic> are products of 8 and 5 distinct primes, respectively, where every prime <italic>P</italic> is at most 60 bits long and satisfies <italic>P</italic>=1 (mod 2<italic>n</italic>), which makes the NTT accessible. The plaintext modulus in <italic>param1</italic> also satisfies <italic>t1</italic>=1 (mod 2<italic>n</italic>), allowing for the implementation of CRT batching.</p>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>Select parameters for Brakerski/Fan-Vercauteren homomorphic encryption.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="200"/>
            <col width="400"/>
            <col width="400"/>
            <thead>
              <tr valign="top">
                <td>Parameters</td>
                <td>
                  <italic>param1</italic>
                </td>
                <td>
                  <italic>param2</italic>
                </td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Polynomial modulus</td>
                <td>16,384</td>
                <td>16,384</td>
              </tr>
              <tr valign="top">
                <td>Coefficient modulus</td>
                <td>438-bit integer</td>
                <td>300-bit integer</td>
              </tr>
              <tr valign="top">
                <td>Plaintext modulus</td>
                <td>1125899904679937</td>
                <td>Coefficient modulus of <italic>param1</italic></td>
              </tr>
              <tr valign="top">
                <td>Key distribution</td>
                <td>Uniform distribution {−1, 0, 1}</td>
                <td>Uniform distribution {−1, 0, 1}</td>
              </tr>
              <tr valign="top">
                <td>Error distribution</td>
                <td>Discrete Gaussian distribution, with σ=3.2</td>
                <td>Discrete Gaussian distribution with σ=3.2</td>
              </tr>
              <tr valign="top">
                <td>Security level</td>
                <td>128-bit</td>
                <td>192-bit</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
        <p>To simulate a real-world scenario, we place the data providers, the researcher, and the service provider on different machines. For the data providers and the researcher, we use PCs with a 2.2-GHz Intel Core i7-8750H processor and 16.0 GB RAM (Windows 10 Enterprise). For the service provider, we use a server with a 2.3 GHz Intel Xeon Gold 6140 processor and 128.0 GB RAM (Linux 3.10.0). The secure logistic regression protocol is implemented in C++ using Microsoft SEAL v3.0 and is publicly available at GitHub [<xref ref-type="bibr" rid="ref36">36</xref>], where we made some modifications to support the threshold-variant BFV scheme [<xref ref-type="bibr" rid="ref37">37</xref>]. All PCs have an internet connection of 100 Mbps bandwidth.</p>
      </sec>
      <sec>
        <title>Security Analysis</title>
        <p>In our protocol, security means that corrupted parties will not be able to obtain sensitive data or learned models from honest parties. Here, we show the security of our protocol from the following 2 aspects: (1) honest parties’ secret keys will not be obtained by the corrupted parties so that no ciphertext will be decrypted illegally, including the encrypted data, model parameters, and any other intermediate results and (2) if the researcher is an adversary, he or she cannot obtain any meaningful information about honest parties’ individuals from the unencrypted intermediate results.</p>
        <sec>
          <title>Security of Secret Keys</title>
          <p>To demonstrate the security of the secret keys, we use the simulation paradigm described in the study by Goldreich [<xref ref-type="bibr" rid="ref38">38</xref>], that is, for all adversaries, there exists a simulator program <italic>S</italic> that, when provided only with the adversaries’ input and output, can simulate the adversaries’ view in the protocol, and the simulated view is computationally indistinguishable from the real view. Suppose there are <italic>z</italic> parties. Let <italic>A</italic> denote the adversaries, defined as a subset of at most <italic>z</italic> −1 corrupted parties, and <italic>H</italic> denote the honest parties.</p>
        </sec>
        <sec>
          <title>Combined Public Key Generation</title>
          <p>In the generation of the combined public key, <italic>S</italic> can simulate the adversaries’ view of public key shares (<italic>pk<sub>1</sub></italic>, <italic>pk<sub>2</sub></italic>,..., <italic>pk<sub>z</sub></italic>) by randomizing these shares under 2 constraints: (1) the simulated shares must sum to <italic>pk<sub>co</sub></italic>(0) and (2) the adversary shares must be equal to the real shares. <italic>S</italic> can compute this sharing as follows:</p>
          <p>
            <disp-formula>
              <graphic xlink:href="jmir_v22i12e22555_fig18.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
            </disp-formula>
          </p>
          <p>When &#124;<italic>H</italic>&#124;&#62;1, there is no efficient algorithm that can distinguish between the simulated and real shares in <italic>H</italic> because of the decision-RLWE problem. When &#124;<italic>H</italic>&#124;=1, <italic>S</italic> computes the real shares of the honest party. However, because both <italic>s<sub>i</sub></italic> and <italic>e<sub>i</sub></italic> are private inputs from party <italic>p<sub>i</sub></italic>, the adversaries cannot find the secret key of the honest party because of the search-RLWE problem.</p>
        </sec>
        <sec>
          <title>Decryption</title>
          <p>Given the ciphertext <italic>c</italic>=(<italic>c</italic>(0), <italic>c</italic>(1)), during the decryption process, <italic>S</italic> can simulate the adversaries’ view of the decryption shares (<italic>μ<sub>1</sub></italic>, <italic>μ<sub>2</sub></italic>,..., <italic>μ<sub>z</sub></italic>) by randomizing these shares under 2 constraints: (1) the simulated shares must sum to <italic>μ</italic>–<italic>c</italic>(0) and (2) the adversary shares must be equal to the real shares:</p>
          <p>
            <disp-formula>
              <graphic xlink:href="jmir_v22i12e22555_fig19.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
            </disp-formula>
          </p>
          <p>When considering the distribution of the simulated and real views alone, the RLWE assumption is sufficient to ensure the security of secret keys of <italic>H</italic> if the researcher is uncorrupted. However, if the researcher becomes an adversary, they can extract the noise of <italic>c</italic> as follows:</p>
          <p>
            <disp-formula>
              <graphic xlink:href="jmir_v22i12e22555_fig20.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
            </disp-formula>
          </p>
          <p>where <italic>e’</italic> is the noise of <italic>c</italic>, which should be unknown to the researcher; otherwise, the RLWE assumption will be broken and the secret keys of the honest parties may be exposed to the researcher. Let <italic>var<sup>2</sup><sub>c</sub></italic> denote the variance of a centered Gaussian distribution that <italic>e</italic> follows and <italic>var<sup>2</sup><sub>smg</sub></italic> denote the variance of <italic>D<sub>smg</sub></italic>, which is used to generate <italic>e<sub>i</sub></italic>. Thus, as long as the ratio <italic>var<sup>2</sup><sub>c</sub></italic>/<italic>var<sup>2</sup><sub>smg</sub></italic> is negligible, the following 2 distributions are statistically indistinguishable, which means that <italic>e’</italic> is unknown to the researcher and that the researcher cannot obtain <italic>H</italic>’s secret keys:</p>
          <p>
            <disp-formula>
              <graphic xlink:href="jmir_v22i12e22555_fig21.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
            </disp-formula>
          </p>
        </sec>
        <sec>
          <title>Unencrypted Intermediate Results</title>
          <p>First, during model training, all data providers apply one-time-use noise to mask the encrypted gradient before decryption, meaning that even if only one data owner is honest, it will not lead to the disclosure of the gradients of the individuals.</p>
          <p>Second, during model evaluation, the researcher will inevitably obtain CRT-batched polynomials containing the predictive values for each sample. Given a masked predictive value <italic>σ<sub>i</sub></italic> ∈ (<italic>V<sub>j</sub></italic>, <italic>V<sub>j+1</sub></italic>), the probability of recovering the research data is computed as follows:</p>
          <p>
            <disp-formula>
              <graphic xlink:href="jmir_v22i12e22555_fig22.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
            </disp-formula>
          </p>
          <p>Here, <italic>N<sub>j</sub></italic> is the number of samples whose predictive value belongs to (<italic>V<sub>j</sub></italic>, <italic>V<sub>j+1</sub></italic>), <italic>N<sub>ej</sub></italic> is the number of empty slots whose value belongs to (<italic>V<sub>j</sub></italic>, <italic>V<sub>j+1</sub></italic>), and <italic>N<sub>pi</sub></italic> is the number of all possible combinations of feature values whose predictive value belongs to (<italic>σ<sub>i</sub></italic>−1569358279, <italic>σ<sub>i</sub></italic>+1569358279). Therefore, as long as either of these 2 terms is sufficiently small, it is impossible for the researcher to recover the feature values.</p>
          <p>Furthermore, because the encrypted (TP, FP, TN, and FN) information of samples under different predictive value thresholds is also masked by all data providers before being sent to the researcher, the researcher cannot obtain the label of any specific sample.</p>
        </sec>
      </sec>
      <sec>
        <title>Accuracy Loss</title>
        <p>In <xref ref-type="table" rid="table3">Table 3</xref>, we demonstrate the accuracy of our protocol by comparing the area under the curve between the nonsecure logistic regression and our secure logistic regression, where the former uses the standard sigmoid function and both have the same hyperparameters (learning rate <italic>α</italic>=.1, 45 iterations). Compared with that of the nonsecure protocol, a relatively small loss of accuracy was observed in our protocol, which was not statistically significant (the smallest <italic>P</italic>=.09). The average receiver operating characteristic curves from the 10-fold cross-validation are plotted in <xref rid="figure3" ref-type="fig">Figure 3</xref>.</p>
        <table-wrap position="float" id="table3">
          <label>Table 3</label>
          <caption>
            <p>Accuracy comparison between nonsecure and proposed secure logistic regressions.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="500"/>
            <col width="250"/>
            <col width="250"/>
            <thead>
              <tr valign="top">
                <td>Data sets</td>
                <td>SEER<sup>a</sup> CRC<sup>b</sup> data</td>
                <td>Breast cancer</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>AUC<sup>c</sup> (nonsecure)</td>
                <td>0.703 (0.008)</td>
                <td>0.728 (0.156)</td>
              </tr>
              <tr valign="top">
                <td>AUC (our protocol)</td>
                <td>0.696 (0.008)</td>
                <td>0.717 (0.164)</td>
              </tr>
              <tr valign="top">
                <td><italic>P</italic> value (AUC)</td>
                <td>.09</td>
                <td>.88</td>
              </tr>
              <tr valign="top">
                <td>Accuracy (nonsecure)</td>
                <td>0.620 (0.013)</td>
                <td>0.664 (0.149)</td>
              </tr>
              <tr valign="top">
                <td>Accuracy (our protocol)</td>
                <td>0.612 (0.013)</td>
                <td>0.632 (0.155)</td>
              </tr>
              <tr valign="top">
                <td><italic>P</italic> value (accuracy)</td>
                <td>.18</td>
                <td>.64</td>
              </tr>
              <tr valign="top">
                <td><italic>F</italic><sub>1</sub><sup>d</sup> (nonsecure)</td>
                <td>0.654 (0.012)</td>
                <td>0.508 (0.198)</td>
              </tr>
              <tr valign="top">
                <td><italic>F</italic><sub>1</sub> (our protocol)</td>
                <td>0.649 (0.012)</td>
                <td>0.505 (0.240)</td>
              </tr>
              <tr valign="top">
                <td><italic>P</italic> value (<italic>F</italic><sub>1</sub>)</td>
                <td>.42</td>
                <td>.97</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table3fn1">
              <p><sup>a</sup>SEER: surveillance, epidemiology, and end results.</p>
            </fn>
            <fn id="table3fn2">
              <p><sup>b</sup>CRC: colorectal cancer.</p>
            </fn>
            <fn id="table3fn3">
              <p><sup>c</sup>AUC: area under the curve.</p>
            </fn>
            <fn id="table3fn4">
              <p><sup>d</sup><italic>F</italic><sub>1</sub>: the harmonic mean of the precision and recall.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <fig id="figure3" position="float">
          <label>Figure 3</label>
          <caption>
            <p>Average receiver operating characteristic curves of nonsecure and proposed secure logistic regressions. CRC: colorectal cancer; ROC: receiver operating characteristic; SEER: surveillance, epidemiology, and end results; UCI: University of California, Irvine.</p>
          </caption>
          <graphic xlink:href="jmir_v22i12e22555_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <p>Furthermore, in <xref ref-type="table" rid="table4">Table 4</xref>, we test the relationships between the learning rate and the convergence of the nonsecure and secure logistic regressions. Although our protocol’s model training will be fully spoiled because of the limited valid input interval for the approximation sigmoid function when the learning rate becomes too large, our protocol has a slightly broader range of learning rate selection than the nonsecure protocol.</p>
        <table-wrap position="float" id="table4">
          <label>Table 4</label>
          <caption>
            <p>&#124;&#124;βnew–βold&#124;&#124; ÷ &#124;&#124;βnew&#124;&#124; after 99 iterations (surveillance, epidemiology, and end results colorectal cancer data).</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="200"/>
            <col width="200"/>
            <col width="200"/>
            <col width="200"/>
            <col width="200"/>
            <thead>
              <tr valign="top">
                <td>Learning rate</td>
                <td>0.1</td>
                <td>0.2</td>
                <td>0.3</td>
                <td>0.4</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Nonsecure</td>
                <td>0.056</td>
                <td>0.046</td>
                <td>0.302</td>
                <td>0.347</td>
              </tr>
              <tr valign="top">
                <td>Our protocol</td>
                <td>0.061</td>
                <td>0.052</td>
                <td>0.047</td>
                <td>—<sup>a</sup></td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table4fn1">
              <p><sup>a</sup>Fail to convergence.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Model Training and Evaluation Time</title>
        <p>We show the time consumption of the 10-fold cross-validation for the 2 different data sets in <xref ref-type="table" rid="table5">Table 5</xref>.</p>
        <p>Here, we compare our protocol with the SecureLR protocol by Jiang et al [<xref ref-type="bibr" rid="ref19">19</xref>], which is also optimized with NTT and CRT batching but evaluated on only 1 PC. As shown in their experiments, SecureLR can train only 1 model at a time and requires 44.9 seconds per iteration over a data set with a ciphertext size of 5.0 M. In comparison, our protocol can train 10 models simultaneously and perform each iteration much faster (on a data set with a ciphertext size of 60.0 M in less than 10 seconds per iteration). Moreover, our protocol supports secure model evaluation with reasonable time consumption.</p>
        <table-wrap position="float" id="table5">
          <label>Table 5</label>
          <caption>
            <p>Time consumption of the proposed protocol.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="200"/>
            <col width="200"/>
            <col width="200"/>
            <col width="200"/>
            <col width="200"/>
            <thead>
              <tr valign="top">
                <td>Data sets</td>
                <td>Iterations, n</td>
                <td>Training time</td>
                <td>Time per iteration (seconds)</td>
                <td>Evaluation time</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>SEER<sup>a</sup> CRC<sup>b</sup> data</td>
                <td>45</td>
                <td>7 min 29 seconds</td>
                <td>9.98</td>
                <td>20 min 27 seconds</td>
              </tr>
              <tr valign="top">
                <td>UCI<sup>c</sup> breast cancer</td>
                <td>45</td>
                <td>4 min 24 seconds</td>
                <td>5.87</td>
                <td>14 min 28 seconds</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table5fn1">
              <p><sup>a</sup>SEER: surveillance, epidemiology, and end results.</p>
            </fn>
            <fn id="table5fn2">
              <p><sup>b</sup>CRC: colorectal cancer.</p>
            </fn>
            <fn id="table5fn3">
              <p><sup>c</sup>UCI: unique client identifier.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Scalability Evaluation</title>
        <p>To test our protocol’s scalability, we use a synthetic data set with different numbers of data providers and features, as shown in <xref ref-type="table" rid="table6">Tables 6</xref> and <xref ref-type="table" rid="table7">7</xref>. Given a certain number of features <italic>d</italic>, for the sake of simplicity, we suppose that every data provider encrypts (<italic>d</italic>+1) polynomials. As the number of data providers increases, the computation times of both the model training and evaluation increase proportionally, whereas there is no increase in the transfer time of the model training because the size of the transferred data (encrypted parameters and gradients) is only related to the number of features. Similarly, because there is no relationship between the number of data providers and the transfer of the encrypted (TP, FP, TN, and FN), the transfer time of the model evaluation increases very less. As the number of features increases, the computation and transfer times of the model training increase proportionally, whereas the computation and transfer times of the model evaluation increase only slightly because the majority of the model evaluation involves the computation of (TP, FP, TN, and FN) information under different predictive value thresholds, which is not related to the number of features.</p>
        <table-wrap position="float" id="table6">
          <label>Table 6</label>
          <caption>
            <p>Scalability of the proposed protocol for different numbers of data providers (9 features).</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="130"/>
            <col width="160"/>
            <col width="130"/>
            <col width="140"/>
            <col width="140"/>
            <col width="150"/>
            <col width="150"/>
            <thead>
              <tr valign="top">
                <td>Data providers, n</td>
                <td>Size of ciphertexts, MB</td>
                <td>Iterations, n</td>
                <td>Training time (computation)</td>
                <td>Training time (transfer)</td>
                <td>Evaluation time (computation)</td>
                <td>Evaluation time (transfer)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>3</td>
                <td>60.0</td>
                <td>45</td>
                <td>4 min 16 seconds</td>
                <td>3 min 13 seconds</td>
                <td>9 min 54 seconds</td>
                <td>10 min 33 seconds</td>
              </tr>
              <tr valign="top">
                <td>5</td>
                <td>100.0</td>
                <td>45</td>
                <td>6 min 26 seconds</td>
                <td>3 min 13 seconds</td>
                <td>15 min 24 seconds</td>
                <td>10 min 39 seconds</td>
              </tr>
              <tr valign="top">
                <td>10</td>
                <td>200.0</td>
                <td>45</td>
                <td>12 min 45 seconds</td>
                <td>3 min 12 seconds</td>
                <td>30 min 42 seconds</td>
                <td>10 min 51 seconds</td>
              </tr>
              <tr valign="top">
                <td>15</td>
                <td>300.0</td>
                <td>45</td>
                <td>19 min 5 seconds</td>
                <td>3 min 13 seconds</td>
                <td>45 min 54 seconds</td>
                <td>11 min 3 seconds</td>
              </tr>
              <tr valign="top">
                <td>20</td>
                <td>400.0</td>
                <td>45</td>
                <td>25 min 52 seconds</td>
                <td>3 min 13 seconds</td>
                <td>61 min 13 seconds</td>
                <td>11 min 17 seconds</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
        <table-wrap position="float" id="table7">
          <label>Table 7</label>
          <caption>
            <p>Scalability of the proposed protocol for different numbers of features (3 data providers).</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="130"/>
            <col width="160"/>
            <col width="130"/>
            <col width="140"/>
            <col width="140"/>
            <col width="150"/>
            <col width="150"/>
            <thead>
              <tr valign="top">
                <td>Features, n</td>
                <td>Size of ciphertexts, MB</td>
                <td>Iterations, n</td>
                <td>Training time (computation)</td>
                <td>Training time (transfer)</td>
                <td>Evaluation time (computation)</td>
                <td>Evaluation time (transfer)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>3</td>
                <td>60.0</td>
                <td>45</td>
                <td>4 min 16 seconds</td>
                <td>3 min 13 seconds</td>
                <td>9 min 54 seconds</td>
                <td>10 min 33 seconds</td>
              </tr>
              <tr valign="top">
                <td>5</td>
                <td>100.0</td>
                <td>45</td>
                <td>8 min 30 seconds</td>
                <td>6 min 23 seconds</td>
                <td>10 min 22 seconds</td>
                <td>10 min 53 seconds</td>
              </tr>
              <tr valign="top">
                <td>10</td>
                <td>200.0</td>
                <td>45</td>
                <td>12 min 48 seconds</td>
                <td>9 min 37 seconds</td>
                <td>10 min 47 seconds</td>
                <td>11 min 13 seconds</td>
              </tr>
              <tr valign="top">
                <td>15</td>
                <td>300.0</td>
                <td>45</td>
                <td>16 min 54 seconds</td>
                <td>12 min 50 seconds</td>
                <td>11 min 16 seconds</td>
                <td>11 min 32 seconds</td>
              </tr>
              <tr valign="top">
                <td>20</td>
                <td>400.0</td>
                <td>45</td>
                <td>21 min 13 seconds</td>
                <td>16 min 10 seconds</td>
                <td>11 min 40 seconds</td>
                <td>11 min 53 seconds</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <p>As researchers cannot obtain unencrypted research data, they may have difficulty choosing the proper hyperparameters, especially the learning rate. Despite a slightly broader range of learning rate selection, the setting of the learning rate is still very important in our privacy-preserving multicenter logistic regression protocol because compared with the nonsecure protocol, our protocol still has a considerable time cost. In our proposed protocol, interactions exist among the service provider, the data providers, and the researcher, allowing the researcher to obtain the plaintext model parameters in every iteration. As a result, the researcher can easily judge whether the hyperparameters are set properly according to the trend of the model parameters. Moreover, the researcher can halt the model training in the early stages, which results in less waste of computational resources. However, to implement the web-based protocol, clients must be installed on all the data providers’ and researchers’ machines, which must be kept online during the entire process of model training and model evaluation, leading to an additional consumption of network bandwidth.</p>
        <p>There is a trade-off between computation and transfer consumption in our protocol. Although some solutions use fully homomorphic encryption to avoid decryption during model training [<xref ref-type="bibr" rid="ref14">14</xref>,<xref ref-type="bibr" rid="ref15">15</xref>], our proposed protocol uses somewhat homomorphic encryption for several reasons. First, to support an unlimited number of operations, a bootstrapping process is required, which is very time consuming. More time is consumed in threshold homomorphic encryption because we must select larger encryption parameters because there is not only greater noise in the combined public and relinearization keys but also greater smudging noise during decryption. Second, to avoid decryption, fixed-point arithmetic operations without a rounding process are required. Bonte and Vercauteren [<xref ref-type="bibr" rid="ref14">14</xref>] use nonintegral base nonadjacent form with window size <italic>ω</italic> to encode a real number as a polynomial, which may affect the use of CRT batching (the most important optimization technique in our protocol), whereas Chen et al [<xref ref-type="bibr" rid="ref15">15</xref>] use the Cheon-Kim-Kim-Song (CKKS) [<xref ref-type="bibr" rid="ref39">39</xref>] scheme, which is also based on RLWE and naturally supports floating-point approximate arithmetic operations. However, in the CKKS scheme, the decryption result contains noise, meaning that in the threshold variant of the CKKS scheme, we must set a very high value for the encryption parameter <italic>scale</italic> to avoid destruction of the plaintext by the smudging noise, which greatly reduces the multiplicative depth of the circuit.</p>
      </sec>
      <sec>
        <title>Limitations</title>
        <p>Our proposed protocol has a few limitations. First, to make the privacy-preserving logistic regression realistic, this protocol requires a high-speed and stable network. Second, as the BFV scheme is based on integers, before encryption, all floating-point numbers must be scaled up and rounded to integers. A larger SF can support a higher level of precision but will also result in higher computation and storage costs for a given security level. Third, in a real-world scenario, a single patient may have multiple medical records across different data providers, which rarely occurs when data providers are far apart but is not uncommon when data providers are located in the same region (eg, a city). Therefore, in the latter case, further research on privacy-preserving identification and deduplication is required to ensure that there are no duplicate medical records to affect the analysis results. Furthermore, this study mainly focuses on technical issues and thus does not delve into matters related to ethics and law, which are also very important in multiparty medical research.</p>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>In this paper, we propose the first privacy-preserving multiparty logistic regression model training and evaluation protocol based on threshold homomorphic encryption. We conduct experiments in simulated real-life scenarios, and the results demonstrate that the proposed protocol is practical for real-world use. We believe that our work can help medical institutions eliminate privacy leakage concerns during data sharing, promote multicenter medical research, and thus improve the use of medical data to some extent.</p>
        <p>In the future, we will extend our tools to be more practical. As the BFV homomorphic encryption scheme does not have indistinguishability under chosen ciphertext attack security, additional security technology, such as hashing, should be integrated into the tools to prevent malicious attackers from tampering with the ciphertexts. More privacy-preserving statistics and machine learning methods will be added to our tools to facilitate considerably enhance flexibility in secure multicenter research. Furthermore, we will improve the efficiency of our tools using graphics processing unit or field programmable gate array acceleration.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>Details of used biomedical data, details of Brakerski/Fan-Vercauteren (BFV) threshold homomorphic encryption, security analysis, and noise analysis.</p>
        <media xlink:href="jmir_v22i12e22555_app1.docx" xlink:title="DOCX File , 38 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">BFV</term>
          <def>
            <p>Brakerski/Fan-Vercauteren</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">CKKS</term>
          <def>
            <p>Cheon-Kim-Kim-Song</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">CRT</term>
          <def>
            <p>Chinese remainder theorem</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">FN</term>
          <def>
            <p>false negative</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">FP</term>
          <def>
            <p>false positive</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">NTT</term>
          <def>
            <p>number theoretic transform</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">RLWE</term>
          <def>
            <p>ring learning with errors</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">SF</term>
          <def>
            <p>scaling factor</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb9">SGX</term>
          <def>
            <p>software guard extensions</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb10">SIMD</term>
          <def>
            <p>single instruction, multiple data</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb11">TN</term>
          <def>
            <p>true negative</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb12">TP</term>
          <def>
            <p>true positive</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>This work was supported by the National Natural Science Foundation of China (under Grant 81771936 and 81801796), the Major Scientific Project of Zhejiang Laboratory (under Grant 2018DG0ZX01), the National Key Research and Development Program of China (under Grant 2018YFC0116901), and the Fundamental Research Funds for the Central Universities, China (No. 2020QNA5031).</p>
    </ack>
    <fn-group>
      <fn fn-type="con">
        <p>The study concept and design were given by YL and TZ. Implementation and experiments of the study were carried out by YL. Drafting of the manuscript was carried out by YL and YT. Discussion, critical revision, and final approval of the version to be published were performed by JL, SZ, YT, TZ, and YL.</p>
      </fn>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chung</surname>
              <given-names>KC</given-names>
            </name>
            <name name-style="western">
              <surname>Song</surname>
              <given-names>JW</given-names>
            </name>
            <collab>WRIST Study Group</collab>
          </person-group>
          <article-title>A guide to organizing a multicenter clinical trial</article-title>
          <source>Plast Reconstr Surg</source>
          <year>2010</year>
          <month>08</month>
          <volume>126</volume>
          <issue>2</issue>
          <fpage>515</fpage>
          <lpage>23</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/20375760"/>
          </comment>
          <pub-id pub-id-type="doi">10.1097/PRS.0b013e3181df64fa</pub-id>
          <pub-id pub-id-type="medline">20375760</pub-id>
          <pub-id pub-id-type="pmcid">PMC2917608</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Downey</surname>
              <given-names>AS</given-names>
            </name>
            <name name-style="western">
              <surname>Olson</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <source>Sharing Clinical Research Data: Workshop Summary</source>
          <year>2013</year>
          <publisher-loc>Washington, DC</publisher-loc>
          <publisher-name>National Academies Press</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Dirnagl</surname>
              <given-names>U</given-names>
            </name>
            <name name-style="western">
              <surname>Fisher</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>International, multicenter randomized preclinical trials in translational stroke research: it's time to act</article-title>
          <source>J Cereb Blood Flow Metab</source>
          <year>2012</year>
          <month>06</month>
          <volume>32</volume>
          <issue>6</issue>
          <fpage>933</fpage>
          <lpage>5</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/22510602"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/jcbfm.2012.51</pub-id>
          <pub-id pub-id-type="medline">22510602</pub-id>
          <pub-id pub-id-type="pii">jcbfm201251</pub-id>
          <pub-id pub-id-type="pmcid">PMC3367233</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="web">
          <article-title>How are Healthcare Data Breach Victims Affected by Attacks?</article-title>
          <source>HealthITSecurity</source>
          <access-date>2020-06-17</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://healthitsecurity.com/news/how-are-healthcare-data-breach-victims-affected-by-attacks">https://healthitsecurity.com/news/how-are-healthcare-data-breach-victims-affected-by-attacks</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Skinner</surname>
              <given-names>ER</given-names>
            </name>
            <name name-style="western">
              <surname>Barnett</surname>
              <given-names>GO</given-names>
            </name>
            <name name-style="western">
              <surname>Singer</surname>
              <given-names>DE</given-names>
            </name>
            <name name-style="western">
              <surname>Mulley</surname>
              <given-names>AG</given-names>
            </name>
            <name name-style="western">
              <surname>Lew</surname>
              <given-names>RA</given-names>
            </name>
            <name name-style="western">
              <surname>Stickler</surname>
              <given-names>SA</given-names>
            </name>
            <name name-style="western">
              <surname>Morgan</surname>
              <given-names>MM</given-names>
            </name>
            <name name-style="western">
              <surname>Thibault</surname>
              <given-names>GE</given-names>
            </name>
          </person-group>
          <article-title>The use of logistic regression in diagnostic and prognostic prediction in a medical intensive care unit</article-title>
          <source>Proc Annu Symp Comput Appl Med Care</source>
          <year>1980</year>
          <month>11</month>
          <volume>1</volume>
          <fpage>222</fpage>
          <lpage>7</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.ncbi.nlm.nih.gov/pmc/articles/PMC2203702/"/>
          </comment>
          <pub-id pub-id-type="pmcid">PMC2203702</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Abdolmaleki</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Yarmohammadi</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Gity</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Comparison of logistic regression and neural network models in predicting the outcome of biopsy in breast cancer from MRI findings</article-title>
          <source>Iran J Radiat Res</source>
          <year>2004</year>
          <volume>1</volume>
          <issue>4</issue>
          <fpage>217</fpage>
          <lpage>28</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://ijrr.com/article-1-33-en.html"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Jiang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Ohno-Machado</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Grid Binary LOgistic REgression (GLORE): building shared models without sharing data</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2012</year>
          <volume>19</volume>
          <issue>5</issue>
          <fpage>758</fpage>
          <lpage>64</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/22511014"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/amiajnl-2012-000862</pub-id>
          <pub-id pub-id-type="medline">22511014</pub-id>
          <pub-id pub-id-type="pii">amiajnl-2012-000862</pub-id>
          <pub-id pub-id-type="pmcid">PMC3422844</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Jiang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Cui</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Cheng</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Ohno-Machado</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>EXpectation Propagation LOgistic REgRession (EXPLORER): distributed privacy-preserving online model learning</article-title>
          <source>J Biomed Inform</source>
          <year>2013</year>
          <month>06</month>
          <volume>46</volume>
          <issue>3</issue>
          <fpage>480</fpage>
          <lpage>96</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(13)00042-7"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2013.03.008</pub-id>
          <pub-id pub-id-type="medline">23562651</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(13)00042-7</pub-id>
          <pub-id pub-id-type="pmcid">PMC3676314</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Jiang</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Xue</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Ohno-Machado</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Jiang</surname>
              <given-names>X</given-names>
            </name>
          </person-group>
          <article-title>WebGLORE: a web service for Grid LOgistic REgression</article-title>
          <source>Bioinformatics</source>
          <year>2013</year>
          <month>12</month>
          <day>15</day>
          <volume>29</volume>
          <issue>24</issue>
          <fpage>3238</fpage>
          <lpage>40</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/24072732"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/bioinformatics/btt559</pub-id>
          <pub-id pub-id-type="medline">24072732</pub-id>
          <pub-id pub-id-type="pii">btt559</pub-id>
          <pub-id pub-id-type="pmcid">PMC3842761</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Aono</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Hayashi</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Phong</surname>
              <given-names>LT</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Scalable and Secure Logistic Regression via Homomorphic Encryption</article-title>
          <source>Proceedings of the Sixth ACM Conference on Data and Application Security and Privacy</source>
          <year>2016</year>
          <conf-name>DASC'16</conf-name>
          <conf-date>Mar 9-11, 2016</conf-date>
          <conf-loc>New Orleans</conf-loc>
          <fpage>142</fpage>
          <lpage>4</lpage>
          <pub-id pub-id-type="doi">10.1145/2857705.2857731</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Aono</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Hayashi</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Phong</surname>
              <given-names>Lt</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Privacy-preserving logistic regression with distributed data sources via homomorphic encryption</article-title>
          <source>IEICE Trans Inf Syst</source>
          <year>2016</year>
          <month>08</month>
          <volume>E99.D</volume>
          <issue>8</issue>
          <fpage>2079</fpage>
          <lpage>89</lpage>
          <pub-id pub-id-type="doi">10.1587/transinf.2015inp0020</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cheon</surname>
              <given-names>JH</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Song</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Ensemble method for privacy-preserving logistic regression based on homomorphic encryption</article-title>
          <source>IEEE Access</source>
          <year>2018</year>
          <month>08</month>
          <volume>6</volume>
          <issue>8</issue>
          <fpage>46938</fpage>
          <lpage>48</lpage>
          <pub-id pub-id-type="doi">10.1109/access.2018.2866697</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Song</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Xia</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Jiang</surname>
              <given-names>X</given-names>
            </name>
          </person-group>
          <article-title>Secure logistic regression based on homomorphic encryption: design and evaluation</article-title>
          <source>JMIR Med Inform</source>
          <year>2018</year>
          <month>04</month>
          <day>17</day>
          <volume>6</volume>
          <issue>2</issue>
          <fpage>e19</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://medinform.jmir.org/2018/2/e19/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/medinform.8805</pub-id>
          <pub-id pub-id-type="medline">29666041</pub-id>
          <pub-id pub-id-type="pii">v6i2e19</pub-id>
          <pub-id pub-id-type="pmcid">PMC5930176</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bonte</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Vercauteren</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <article-title>Privacy-preserving logistic regression training</article-title>
          <source>BMC Med Genomics</source>
          <year>2018</year>
          <month>10</month>
          <day>11</day>
          <volume>11</volume>
          <issue>Suppl 4</issue>
          <fpage>86</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcmedgenomics.biomedcentral.com/articles/10.1186/s12920-018-0398-y"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12920-018-0398-y</pub-id>
          <pub-id pub-id-type="medline">30309364</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12920-018-0398-y</pub-id>
          <pub-id pub-id-type="pmcid">PMC6180357</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Gilad-Bachrach</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Han</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Jalali</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Laine</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Lauter</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Logistic regression over encrypted data from fully homomorphic encryption</article-title>
          <source>BMC Med Genomics</source>
          <year>2018</year>
          <month>10</month>
          <day>11</day>
          <volume>11</volume>
          <issue>Suppl 4</issue>
          <fpage>81</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcmedgenomics.biomedcentral.com/articles/10.1186/s12920-018-0397-z"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12920-018-0397-z</pub-id>
          <pub-id pub-id-type="medline">30309350</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12920-018-0397-z</pub-id>
          <pub-id pub-id-type="pmcid">PMC6180402</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Song</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Cheon</surname>
              <given-names>JH</given-names>
            </name>
          </person-group>
          <article-title>Logistic regression model training based on the approximate homomorphic encryption</article-title>
          <source>BMC Med Genomics</source>
          <year>2018</year>
          <month>10</month>
          <day>11</day>
          <volume>11</volume>
          <issue>Suppl 4</issue>
          <fpage>83</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcmedgenomics.biomedcentral.com/articles/10.1186/s12920-018-0401-7"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12920-018-0401-7</pub-id>
          <pub-id pub-id-type="medline">30309349</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12920-018-0401-7</pub-id>
          <pub-id pub-id-type="pmcid">PMC6180367</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Han</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Hong</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Cheon</surname>
              <given-names>JH</given-names>
            </name>
            <name name-style="western">
              <surname>Park</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Logistic Regression on Homomorphic Encrypted Data at Scale</article-title>
          <source>Proceedings of the Thirty-First Innovative Applications of Artificial Intelligence Conference</source>
          <year>2019</year>
          <conf-name>AAIC'19</conf-name>
          <conf-date>January 28–30, 2019</conf-date>
          <conf-loc>Honolulu</conf-loc>
          <fpage>9466</fpage>
          <lpage>71</lpage>
          <pub-id pub-id-type="doi">10.1609/aaai.v33i01.33019466</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>El Emam</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Samet</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Arbuckle</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Tamblyn</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Earle</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Kantarcioglu</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>A secure distributed logistic regression protocol for the detection of rare adverse drug events</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2013</year>
          <month>05</month>
          <day>1</day>
          <volume>20</volume>
          <issue>3</issue>
          <fpage>453</fpage>
          <lpage>61</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/22871397"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/amiajnl-2011-000735</pub-id>
          <pub-id pub-id-type="medline">22871397</pub-id>
          <pub-id pub-id-type="pii">amiajnl-2011-000735</pub-id>
          <pub-id pub-id-type="pmcid">PMC3628043</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Jiang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Hamer</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Jiang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Song</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Xia</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Mohammed</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Sadat</surname>
              <given-names>MN</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>SecurelR: secure logistic regression model via a hybrid cryptographic protocol</article-title>
          <source>IEEE/ACM Trans Comput Biol Bioinform</source>
          <year>2019</year>
          <volume>16</volume>
          <issue>1</issue>
          <fpage>113</fpage>
          <lpage>23</lpage>
          <pub-id pub-id-type="doi">10.1109/TCBB.2018.2833463</pub-id>
          <pub-id pub-id-type="medline">29994005</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Xiao</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Lai</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>SgXpectre: stealing intel secrets from SGX enclaves via speculative execution</article-title>
          <source>IEEE Secur Privacy</source>
          <year>2020</year>
          <month>05</month>
          <volume>18</volume>
          <issue>3</issue>
          <fpage>28</fpage>
          <lpage>37</lpage>
          <pub-id pub-id-type="doi">10.1109/msec.2019.2963021</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>López-Alt</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Tromer</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Vaikuntanathan</surname>
              <given-names>V</given-names>
            </name>
          </person-group>
          <article-title>On-the-fly Multiparty Computation on the Cloud via Multikey Fully Homomorphic Encryption</article-title>
          <source>Proceedings of the Forty-Fourth Annual ACM Symposium on Theory of Computing</source>
          <year>2012</year>
          <conf-name>ACM'12</conf-name>
          <conf-date>May 11-14, 2012</conf-date>
          <conf-loc>New York, USA</conf-loc>
          <fpage>1219</fpage>
          <lpage>34</lpage>
          <pub-id pub-id-type="doi">10.1145/2213977.2214086</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mouchet</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Troncoso-Pastoriza</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Hubaux</surname>
              <given-names>JP</given-names>
            </name>
          </person-group>
          <article-title>Computing across trust boundaries using distributed homomorphic cryptography</article-title>
          <source>IACR Cryptol ePrint Arch</source>
          <year>2019</year>
          <volume>2019</volume>
          <fpage>961</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://eprint.iacr.org/2019/961.pdf"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hripcsak</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Duke</surname>
              <given-names>JD</given-names>
            </name>
            <name name-style="western">
              <surname>Shah</surname>
              <given-names>NH</given-names>
            </name>
            <name name-style="western">
              <surname>Reich</surname>
              <given-names>CG</given-names>
            </name>
            <name name-style="western">
              <surname>Huser</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Schuemie</surname>
              <given-names>MJ</given-names>
            </name>
            <name name-style="western">
              <surname>Suchard</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Park</surname>
              <given-names>RW</given-names>
            </name>
            <name name-style="western">
              <surname>Wong</surname>
              <given-names>ICK</given-names>
            </name>
            <name name-style="western">
              <surname>Rijnbeek</surname>
              <given-names>PR</given-names>
            </name>
            <name name-style="western">
              <surname>van der Lei</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Pratt</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Norén</surname>
              <given-names>GN</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Stang</surname>
              <given-names>PE</given-names>
            </name>
            <name name-style="western">
              <surname>Madigan</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Ryan</surname>
              <given-names>PB</given-names>
            </name>
          </person-group>
          <article-title>Observational health data sciences and informatics (OHDSI): opportunities for observational researchers</article-title>
          <source>Stud Health Technol Inform</source>
          <year>2015</year>
          <volume>216</volume>
          <fpage>574</fpage>
          <lpage>8</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/26262116"/>
          </comment>
          <pub-id pub-id-type="medline">26262116</pub-id>
          <pub-id pub-id-type="pmcid">PMC4815923</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Paverd</surname>
              <given-names>AJ</given-names>
            </name>
            <name name-style="western">
              <surname>Martin</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Brown</surname>
              <given-names>I</given-names>
            </name>
          </person-group>
          <article-title>Modelling and Automatically Analysing Privacy Properties for Honest-but-Curious Adversaries</article-title>
          <source>University of Oxford</source>
          <access-date>2020-06-17</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.cs.ox.ac.uk/people/andrew.paverd/casper/casper-privacy-report.pdf">https://www.cs.ox.ac.uk/people/andrew.paverd/casper/casper-privacy-report.pdf</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Fan</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Vercauteren</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <article-title>Somewhat Practical Fully Homomorphic Encryption</article-title>
          <source>IACR Cryptol ePrint Arch</source>
          <year>2012</year>
          <volume>2012</volume>
          <fpage>144</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://eprint.iacr.org/2012/144"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Brakerski</surname>
              <given-names>Z</given-names>
            </name>
          </person-group>
          <article-title>Fully Homomorphic Encryption without Modulus Switching from Classical GapSVP</article-title>
          <source>Advances in Cryptology</source>
          <year>2012</year>
          <conf-name>CRYPTO'12</conf-name>
          <conf-date>August 19-23, 2012</conf-date>
          <conf-loc>Santa Barbara</conf-loc>
          <fpage>868</fpage>
          <lpage>86</lpage>
          <pub-id pub-id-type="doi">10.1007/978-3-642-32009-5_50</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Paillier</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Public-Key Cryptosystems Based on Composite Degree Residuosity Classes</article-title>
          <source>Advances in Cryptology</source>
          <year>1999</year>
          <conf-name>EUROCRYPT’99</conf-name>
          <conf-date>May 2–6, 1999</conf-date>
          <conf-loc>Prague</conf-loc>
          <fpage>223</fpage>
          <lpage>38</lpage>
          <pub-id pub-id-type="doi">10.1007/3-540-48910-x_16</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Smart</surname>
              <given-names>NP</given-names>
            </name>
            <name name-style="western">
              <surname>Vercauteren</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <article-title>Fully homomorphic SIMD operations</article-title>
          <source>Des Codes Cryptogr</source>
          <year>2012</year>
          <month>07</month>
          <day>4</day>
          <volume>71</volume>
          <issue>1</issue>
          <fpage>57</fpage>
          <lpage>81</lpage>
          <pub-id pub-id-type="doi">10.1007/s10623-012-9720-4</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bajard</surname>
              <given-names>JC</given-names>
            </name>
            <name name-style="western">
              <surname>Eynard</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Hasan</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Zucca</surname>
              <given-names>V</given-names>
            </name>
          </person-group>
          <article-title>A Full RNS Variant of FV Like Somewhat Homomorphic Encryption Schemes</article-title>
          <source>Selected Areas in Cryptography</source>
          <year>2016</year>
          <conf-name>SAC'16</conf-name>
          <conf-date>August 10-12, 2016</conf-date>
          <conf-loc>St. John's</conf-loc>
          <fpage>423</fpage>
          <lpage>42</lpage>
          <pub-id pub-id-type="doi">10.1007/978-3-319-69453-5_23</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Armknecht</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Boyd</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Carr</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Gjøsteen</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Jaschke</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Reuter</surname>
              <given-names>CA</given-names>
            </name>
            <name name-style="western">
              <surname>Strand</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>A guide to fully homomorphic encryption</article-title>
          <source>IACR Cryptol ePrint Arch</source>
          <year>2015</year>
          <volume>2015</volume>
          <fpage>1192</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://eprint.iacr.org/2015/1192.pdf"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ypma</surname>
              <given-names>TJ</given-names>
            </name>
          </person-group>
          <article-title>Historical development of the Newton–Raphson method</article-title>
          <source>SIAM Rev</source>
          <year>1995</year>
          <month>12</month>
          <volume>37</volume>
          <issue>4</issue>
          <fpage>531</fpage>
          <lpage>51</lpage>
          <pub-id pub-id-type="doi">10.1137/1037125</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Tian</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Shang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Tong</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Chi</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Kong</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Ding</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>POPCORN: a web service for individual PrognOsis prediction based on multi-center clinical data CollabORatioN without patient-level data sharing</article-title>
          <source>J Biomed Inform</source>
          <year>2018</year>
          <month>10</month>
          <volume>86</volume>
          <fpage>1</fpage>
          <lpage>14</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(18)30163-1"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2018.08.008</pub-id>
          <pub-id pub-id-type="medline">30103028</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(18)30163-1</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="web">
          <article-title>Breast Cancer</article-title>
          <source>UCI Machine Learning Repository: Data Sets</source>
          <access-date>2020-06-17</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://archive.ics.uci.edu/ml/datasets/Breast+Cancer">https://archive.ics.uci.edu/ml/datasets/Breast+Cancer</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Albrecht</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Chase</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Ding</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Goldwasser</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Gorbunov</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Halevi</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Hoffstein</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Lauter</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Lokam</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Micciancio</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Moody</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Morrison</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Sahai</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Vaikuntanathan</surname>
              <given-names>V</given-names>
            </name>
          </person-group>
          <article-title>Homomorphic Encryption Standard</article-title>
          <source>Homomorphic Encryption Standardization</source>
          <access-date>2020-06-17</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://homomorphicencryption.org/wp-content/uploads/2018/08/HomomorphicEncryptionStandard2018-08-30.pdf">http://homomorphicencryption.org/wp-content/uploads/2018/08/HomomorphicEncryptionStandard2018-08-30.pdf</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lu</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>LR-THE</article-title>
          <source>GitHub</source>
          <access-date>2020-09-12</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://github.com/luyao2211/LR-THE">https://github.com/luyao2211/LR-THE</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Laine</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Simple Encrypted Arithmetic Library 2.3.1</article-title>
          <source>Microsoft</source>
          <access-date>2020-06-17</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.microsoft.com/en-us/research/uploads/prod/2017/11/sealmanual-2-3-1.pdf">https://www.microsoft.com/en-us/research/uploads/prod/2017/11/sealmanual-2-3-1.pdf</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Goldreich</surname>
              <given-names>O</given-names>
            </name>
          </person-group>
          <source>Foundations of Cryptography</source>
          <year>2004</year>
          <publisher-loc>New York, USA</publisher-loc>
          <publisher-name>Cambridge University Press</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref38">
        <label>38</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cheon</surname>
              <given-names>JH</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Song</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Homomorphic Encryption for Arithmetic of Approximate Numbers</article-title>
          <source>Advances in Cryptology</source>
          <year>2017</year>
          <conf-name>ASIACRYPT'17</conf-name>
          <conf-date>December 3-7, 2017</conf-date>
          <conf-loc>Hong Kong</conf-loc>
          <fpage>409</fpage>
          <lpage>37</lpage>
          <pub-id pub-id-type="doi">10.1007/978-3-319-70694-8_15</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref39">
        <label>39</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lindell</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>How to simulate it – a tutorial on the simulation proof technique</article-title>
          <source>Tutorials on the Foundations of Cryptography</source>
          <year>2017</year>
          <month>04</month>
          <day>6</day>
          <publisher-loc>Switzerland</publisher-loc>
          <publisher-name>Springer, Cham</publisher-name>
          <fpage>277</fpage>
          <lpage>346</lpage>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
