<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMIR</journal-id>
      <journal-id journal-id-type="nlm-ta">J Med Internet Res</journal-id>
      <journal-title>Journal of Medical Internet Research</journal-title>
      <issn pub-type="epub">1438-8871</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v23i8e27235</article-id>
      <article-id pub-id-type="pmid">34236336</article-id>
      <article-id pub-id-type="doi">10.2196/27235</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Real-Time Respiratory Tumor Motion Prediction Based on a Temporal Convolutional Neural Network: Prediction Model Development Study</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Kukafka</surname>
            <given-names>Rita</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Li</surname>
            <given-names>Bing</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Kim</surname>
            <given-names>Namkug</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Chang</surname>
            <given-names>Panchun</given-names>
          </name>
          <degrees>MA</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-0909-638X</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Dang</surname>
            <given-names>Jun</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-9077-6544</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Dai</surname>
            <given-names>Jianrong</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-3249-440X</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Sun</surname>
            <given-names>Wenzheng</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff4" ref-type="aff">4</xref>
          <address>
            <institution>Department of Radiation Oncology, School of Medicine</institution>
            <institution>The Second Affiliated Hospital</institution>
            <institution>Zhejiang University</institution>
            <addr-line>88 Jiefang Road</addr-line>
            <addr-line>Hangzhou, 310009</addr-line>
            <country>China</country>
            <phone>86 057187783538</phone>
            <email>sunwenzheng@zju.edu.cn</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-2629-744X</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Department of Oncology</institution>
        <institution>The First Affiliated Hospital of Chongqing Medical University</institution>
        <addr-line>Chongqing</addr-line>
        <country>China</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>School of Physics and Electronics</institution>
        <institution>Shandong Normal University</institution>
        <addr-line>Jinan</addr-line>
        <country>China</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>Department of Radiation Oncology</institution>
        <institution>National Cancer Center/National Clinical Research Center for Cancer/Cancer Hospital</institution>
        <institution>Chinese Academy of Medical Sciences and Peking Union Medical College</institution>
        <addr-line>Beijing</addr-line>
        <country>China</country>
      </aff>
      <aff id="aff4">
        <label>4</label>
        <institution>Department of Radiation Oncology, School of Medicine</institution>
        <institution>The Second Affiliated Hospital</institution>
        <institution>Zhejiang University</institution>
        <addr-line>Hangzhou</addr-line>
        <country>China</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Wenzheng Sun <email>sunwenzheng@zju.edu.cn</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <month>8</month>
        <year>2021</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>27</day>
        <month>8</month>
        <year>2021</year>
      </pub-date>
      <volume>23</volume>
      <issue>8</issue>
      <elocation-id>e27235</elocation-id>
      <history>
        <date date-type="received">
          <day>18</day>
          <month>1</month>
          <year>2021</year>
        </date>
        <date date-type="rev-request">
          <day>17</day>
          <month>3</month>
          <year>2021</year>
        </date>
        <date date-type="rev-recd">
          <day>4</day>
          <month>6</month>
          <year>2021</year>
        </date>
        <date date-type="accepted">
          <day>5</day>
          <month>7</month>
          <year>2021</year>
        </date>
      </history>
      <copyright-statement>©Panchun Chang, Jun Dang, Jianrong Dai, Wenzheng Sun. Originally published in the Journal of Medical Internet Research (https://www.jmir.org), 27.08.2021.</copyright-statement>
      <copyright-year>2021</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in the Journal of Medical Internet Research, is properly cited. The complete bibliographic information, a link to the original publication on https://www.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://www.jmir.org/2021/8/e27235" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>The dynamic tracking of tumors with radiation beams in radiation therapy requires the prediction of real-time target locations prior to beam delivery, as treatment involving radiation beams and gating tracking results in time latency.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>In this study, a deep learning model that was based on a temporal convolutional neural network was developed to predict internal target locations by using multiple external markers.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>Respiratory signals from 69 treatment fractions of 21 patients with cancer who were treated with the CyberKnife Synchrony device (Accuray Incorporated) were used to train and test the model. The reported model’s performance was evaluated by comparing the model to a long short-term memory model in terms of the root mean square errors (RMSEs) of real and predicted respiratory signals. The effect of the number of external markers was also investigated.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>The average RMSEs of predicted (ahead time=400 ms) respiratory motion in the superior-inferior, anterior-posterior, and left-right directions and in 3D space were 0.49 mm, 0.28 mm, 0.25 mm, and 0.67 mm, respectively.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>The experiment results demonstrated that the temporal convolutional neural network–based respiratory prediction model could predict respiratory signals with submillimeter accuracy.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>radiation therapy</kwd>
        <kwd>temporal convolutional neural network</kwd>
        <kwd>respiratory signal prediction</kwd>
        <kwd>neural network</kwd>
        <kwd>deep learning model</kwd>
        <kwd>dynamic tracking</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <p>The aim of radiation therapy is not only to deliver lethal doses of radiation to target tumors but also to minimize the dose of unnecessary radiation delivered to the surrounding healthy tissues and structures [<xref ref-type="bibr" rid="ref1">1</xref>-<xref ref-type="bibr" rid="ref5">5</xref>]. Modern technical advances, such as intensity-modulated radiation therapy, have improved the accuracy of dose delivery. However, some targets, such as lung cancer and liver cancer tumors, may move substantially during the treatment delivery process due to respiratory motion [<xref ref-type="bibr" rid="ref6">6</xref>-<xref ref-type="bibr" rid="ref10">10</xref>]. Investigators have reported that lung and liver tumors can move up to 3 cm during a conventional radiation therapy treatment session [<xref ref-type="bibr" rid="ref11">11</xref>,<xref ref-type="bibr" rid="ref12">12</xref>]. The motion of targets may substantially decrease the accuracy and efficiency of intensity-modulated radiation therapy or other advanced technologies.</p>
      <p>Many methods have been investigated to reduce the effect of respiratory motion, which mainly include the following:</p>
      <list list-type="bullet">
        <list-item>
          <p>Adding a margin around the target tumor: a 10- to 15-mm margin is always used as the radiation treatment field to avoid missing a tumor, which may result in unnecessary radiation exposure to heathy tissues and structures [<xref ref-type="bibr" rid="ref13">13</xref>].</p>
        </list-item>
        <list-item>
          <p>Breath hold: patients need to hold their breath during the treatment to temporarily stop respiration, but this is not applicable for some patients, such as older patients and juvenile patients [<xref ref-type="bibr" rid="ref14">14</xref>].</p>
        </list-item>
        <list-item>
          <p>Beam tracking: radiation beams track a moving tumor dynamically to ensure that the tumor target is constantly within the treatment field [<xref ref-type="bibr" rid="ref15">15</xref>].</p>
        </list-item>
      </list>
      <p>All beam tracking methods must compensate for the latency of various sources, such as latencies from beam adjustment and image capture times [<xref ref-type="bibr" rid="ref5">5</xref>,<xref ref-type="bibr" rid="ref16">16</xref>]. Hence, we must estimate the position of targets in advance to compensate for latency effects.</p>
      <p>Recently, deep learning approaches based on long short-term memory (LSTM) have been successfully used to solve time series prediction problems in several fields. For example, Ma et al [<xref ref-type="bibr" rid="ref17">17</xref>] used an LSTM model to capture traffic dynamics data for predicting short-term traffic speed. Bao et al [<xref ref-type="bibr" rid="ref18">18</xref>] implemented an LSTM model to predict the one-step-ahead price (closing) of 6 stock indices for various financial markets. Lin et al [<xref ref-type="bibr" rid="ref19">19</xref>] used an LSTM model to predict respiratory signals. Moreover, some recent studies have demonstrated that certain temporal convolutional neural network (TCN) architectures could achieve state-of-the-art accuracy in time series prediction problems [<xref ref-type="bibr" rid="ref20">20</xref>-<xref ref-type="bibr" rid="ref23">23</xref>]. However, to our knowledge, there are no studies on using a TCN model to predict respiratory tumor motion. Hence, in this study, we developed a TCN-based respiratory prediction model by using external markers and compared the prediction performance of the TCN to that of an LSTM model. We also investigated the effect that the number of external markers had on prediction performance.</p>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Data Acquisition</title>
        <p>The tumor motion data (69 treatment fractions of 21 patients) used in this study were obtained from an open data set, which was recorded by the CyberKnife Synchrony (Accuray Incorporated) tracking system with a recorded sampling rate of 25 Hz [<xref ref-type="bibr" rid="ref24">24</xref>]. To analyze the external movements of patients, charge-coupled device cameras were used to monitor the luminous diodes located on a patient's abdomen and chest. To analyze internal fiducial positions, orthogonal diagnostic x-ray systems were used to observe implanted markers periodically.</p>
      </sec>
      <sec>
        <title>Prediction Process</title>
        <p>The general scheme for the prediction process of 2 models is outlined in <xref rid="figure1" ref-type="fig">Figure 1</xref>, and the arrangement of the respiratory signals that were used for network training and validation is shown in <xref ref-type="table" rid="table1">Table 1</xref>. Each recorded position (internal tumor and external marker positions) was stratified into 2 cohorts based on time t<sub>s</sub>. The positions prior to time t<sub>s</sub> (the training signals) were used to train the TCN and LSTM models. The positions after t<sub>s</sub> (the testing signals) were used to evaluate the developed model.</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>Flowchart of the prediction algorithm.</p>
          </caption>
          <graphic xlink:href="jmir_v23i8e27235_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>The arrangement of respiratory signals used for network training and validation.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="470"/>
            <col width="0"/>
            <col width="250"/>
            <col width="0"/>
            <col width="250"/>
            <thead>
              <tr valign="top">
                <td colspan="3">Position type</td>
                <td colspan="2">Data for training</td>
                <td>Data for validation</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="6">
                  <bold>Inputs of the network</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Position of marker 1</td>
                <td colspan="2">M<sup>a</sup>1<sub>SI</sub><sup>b</sup>, <sub>AP</sub><sup>c</sup>, <sub>LR</sub><sup>d</sup> (1, 2,…, t<sub>s</sub>)</td>
                <td colspan="2">M1<sub>SI, AP, LR</sub> (t<sub>s</sub>+1, 2,…, t<sub>s</sub>+t<sub>end</sub>)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Position of marker 2</td>
                <td colspan="2">M2<sub>SI, AP, LR</sub> (1, 2,…, t<sub>s</sub>)</td>
                <td colspan="2">M2<sub>SI, AP, LR</sub> (t<sub>s</sub>+1, 2,…, t<sub>s</sub>+t<sub>end</sub>)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Position of marker 3</td>
                <td colspan="2">M3<sub>SI, AP, LR</sub> (1, 2,…, t<sub>s</sub>)</td>
                <td colspan="2">M3<sub>SI, AP, LR</sub> (t<sub>s</sub>+1, 2,…, t<sub>s</sub>+t<sub>end</sub>)</td>
              </tr>
              <tr valign="top">
                <td colspan="6">
                  <bold>Targets of the network</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Position of a tumor</td>
                <td colspan="2">T<sup>e</sup><sub>SI, AP, LR</sub> (1, 2,…, t<sub>s</sub>)</td>
                <td colspan="2"> T<sub>SI, AP, LR</sub> (t<sub>s</sub>+1, 2,…, t<sub>s</sub>+t<sub>end</sub>)</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table1fn1">
              <p><sup>a</sup>M: external marker position.</p>
            </fn>
            <fn id="table1fn2">
              <p><sup>b</sup>SI: superior-inferior.</p>
            </fn>
            <fn id="table1fn3">
              <p><sup>c</sup>AP: anterior-posterior.</p>
            </fn>
            <fn id="table1fn4">
              <p><sup>d</sup>LR: left-right.</p>
            </fn>
            <fn id="table1fn5">
              <p><sup>e</sup>T: tumor position.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <p>For the training process, the training input data and prediction target data were first used to tune the hyperparameters, which was done by using a cross-validation model. Afterward, they were used to train the model. The external markers’ positions during the first input period of the training process (ie, the time between t=1 and t=t<sub>delay</sub>) were used as the training input data for predicting the tumor positions (target positions) at a specific time frame (t=t<sub>delay</sub>+t<sub>ahead</sub>). This training process was repeated and continued to predict the next tumor position until either the threshold of the cost function or the maximum iteration number, which was set in advance, was reached. Each pair of data points (ie, the input data, M[t+1,…, t+t<sub>delay</sub>], vs the output data, T[t+t<sub>delay</sub>+t<sub>ahead</sub>]) consisted of a training data set. “M” denoted 3 external markers’ positions (M1, M2, and M3), which were based on 3 directions (the superior-inferior, anterior-posterior, and left-right directions). t<sub>ahead</sub> represented the ahead time we needed for making predictions.</p>
        <p>For the evaluation process, the testing signals, which were represented as M(t<sub>s</sub>+1, t<sub>s</sub>+2,…, t<sub>end</sub>) and T(t<sub>s</sub>+1, t<sub>s</sub>+2,…, t<sub>s</sub>+ t<sub>end</sub>), were used to evaluate the developed model. Similar to the process implemented in the training process, the external markers’ positions during the first input period of the evaluation process (ie, the time between t=1 and t=t<sub>delay</sub>) were used to predict a tumor’s position (T’[t<sub>s</sub>+t<sub>delay</sub>+t<sub>ahead</sub>]) at a specific time (t=t<sub>s</sub>+t<sub>delay</sub>+t<sub>ahead</sub>). This process was also repeated to predict the next tumor position continuously. The external signals that were recorded during radiation therapy (ie, the time between t=t<sub>end</sub>−t<sub>delay</sub>−t<sub>ahead</sub>+1 and t=t<sub>end</sub>−t<sub>ahead</sub>) were used to predict the final tumor position (T’[t<sub>end</sub>]). Finally, the predicted signals (T’[t<sub>s</sub>+t<sub>delay</sub>+t<sub>ahead</sub>],…, T’[t<sub>end</sub>]) were compared to the real tumor positions (T[t<sub>s</sub>+t<sub>delay</sub>+t<sub>ahead</sub>],…, T[t<sub>end</sub>]).</p>
      </sec>
      <sec>
        <title>LSTM Model</title>
        <p>The recurrent neural network (RNN) is a particular type of neural network that allows for self-cycle connections and transmits parameters across different time stamps. An RNN model can store the information of former time stamps. However, it is difficult for the RNN to memorize long-term memory information due to vanishing and exploding gradients [<xref ref-type="bibr" rid="ref25">25</xref>-<xref ref-type="bibr" rid="ref27">27</xref>].</p>
        <p>The LSTM layer is a special RNN layer that overcomes the weakness that the RNN has with memorizing long-term memory information [<xref ref-type="bibr" rid="ref26">26</xref>,<xref ref-type="bibr" rid="ref28">28</xref>]. <xref rid="figure2" ref-type="fig">Figure 2</xref> shows an LSTM unit. Unlike the simple RNN unit, the LSTM unit has a memory cell state <italic>c</italic><sub>t</sub> at time t. The information that passes through state <italic>c</italic><sub>t</sub> is controlled by the following three gates: the input gate (<italic>i<sub>t</sub></italic>), the forget gate (<italic>f<sub>t</sub></italic>), and the output gate (<italic>o<sub>t</sub></italic>). The input gate is used to control input data that flow into state <italic>c</italic><sub>t</sub>, the hidden state connection (<italic>h<sub>t</sub></italic>) is used to control the forgetting of state <italic>c</italic><sub>t</sub>, and the output gate is used to moderate the output data that flow from state <italic>c</italic><sub>t</sub>. A plurality of LSTM layers can be stacked in a deeper neural network, which can fit the data of the complicated functions that are required to analyze the inputs and the targets.</p>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>The structure of an LSTM layer. LSTM: long short-term memory.</p>
          </caption>
          <graphic xlink:href="jmir_v23i8e27235_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>TCN Model</title>
        <p>The TCN model was based on a transformation of a 1D fully convolutional network that was used for sequential prediction problems. The TCN model used a multilayer network to learn information over a long time span. Sequence information were transmitted layer by layer across the network until prediction results were obtained. The architecture of the TCN model is illustrated in <xref rid="figure3" ref-type="fig">Figure 3</xref> [<xref ref-type="bibr" rid="ref23">23</xref>], in which <italic>x<sub>1</sub></italic>, <italic>x<sub>2</sub></italic>,…, <italic>x<sub>T</sub></italic> are the original sequence signals (inputs), and <inline-graphic xlink:href="jmir_v23i8e27235_fig8.png" xlink:type="simple" mimetype="image"/> are the prediction signals (outputs). The obvious characteristics of the TCN model, which were compared to those of the normal 1D fully convolutional network model, were as follows:</p>
        <list list-type="bullet">
          <list-item>
            <p>The TCN model used causal convolutions, in which the output at time t was convolved only with elements from previous layers at time t and earlier, to ensure that no leakage occurred from the future into the past.</p>
          </list-item>
          <list-item>
            <p>The TCN model used dilated convolutions to ensure that each hidden layer had the same size as the input sequence and to increase the receptive field (ie, learning longer lengths of information).</p>
          </list-item>
        </list>
        <p>The input of the TCN model was interval sampled. The equation for the dilated convolution was as follows:</p>
        <p>
          <disp-formula>
            <graphic xlink:href="jmir_v23i8e27235_fig9.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </disp-formula>
        </p>
        <p>In equation 1, <italic>d</italic> is the dilation factor (sampling rate). A <italic>d</italic> value of 1 in the lowest layer meant that every signal was sampled, whereas a <italic>d</italic> value of 2 in the middle layer meant that every 2 respiratory signals were sampled.</p>
        <p>Residual networks [<xref ref-type="bibr" rid="ref29">29</xref>], which are shown in <xref rid="figure3" ref-type="fig">Figure 3</xref>, were imported in this study to accelerate convergence and stabilize training. A residual block that included a branch was used to make a series of transformations (<italic>F</italic>). Afterward, the outputs of the residual block (ie, <italic>F</italic>[X<sub>residual</sub>]) were added to the input (ie, <italic>X</italic><sub>residual</sub>), as follows:</p>
        <p>
          <disp-formula>O<sub>residual</sub> = Activation(X<sub>residual</sub> + F[X<sub>residual</sub>])</disp-formula>
          <bold>(2)</bold>
        </p>
        <fig id="figure3" position="float">
          <label>Figure 3</label>
          <caption>
            <p>The architecture of the temporal convolutional neural network  model. "d" was the dilation factor. Conv: convolution; ReLU: rectified linear unit.</p>
          </caption>
          <graphic xlink:href="jmir_v23i8e27235_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Hyperparameter Tuning</title>
        <p>With regard to the TCN model, previous TCN studies [<xref ref-type="bibr" rid="ref20">20</xref>-<xref ref-type="bibr" rid="ref23">23</xref>] reported (in the <italic>Instruction</italic> section) using the same TCN architecture and only sometimes varying the number of layers (<italic>n</italic>) and the filter size. Hence, we tested these two hyperparameters and used a dilation factor (<italic>d</italic>) of 2<italic><sup>n</sup></italic> for layer <italic>n.</italic> Moreover, the number of neurons in the input layer and the learning rate of the TCN model were also investigated in this study. For the LSTM model, the number of LSTM layers, learning rate, number of hidden units per layer, and number of neurons in the input layer were investigated. Furthermore, the Adam algorithm was used as the optimization algorithm for both the TCN model and LSTM model. The Kingma and Ba [<xref ref-type="bibr" rid="ref30">30</xref>] study demonstrated that the hyperparameters in the Adam model required little tuning. Goodfellow et al [<xref ref-type="bibr" rid="ref31">31</xref>] also approved of the robustness of the Adam model for their hyperparameter of choice and provided advice on how to tune the learning rate from the default value. Hence, we used the good default settings that were tested by Kingma and Ba [<xref ref-type="bibr" rid="ref30">30</xref>] as the hyperparameters of the Adam optimizer and tuned the learning rate. The default settings were exponential decay rates of 0.9 and 0.999 and a decay exponent of 10<sup>−8</sup>. In this study, all hyperparameters were tuned synthetically by using a grid search model. It should be noted that we tested the hyperparameters in a 4D hyperparameter space instead of a subspace (ie, while a parameter was investigated, others were fixed) to maintain the accuracy of hyperparameter tuning.</p>
      </sec>
      <sec>
        <title>Model Evaluation</title>
        <p>The respiratory signals from 69 treatment fractions of 21 patients with cancer who were treated with the CyberKnife Synchrony (Accuray Incorporated) device were used to evaluate the proposed model. Of the 69 treatment fractions, 5 were used to tune the hyperparameters. The rest of the patients were used to evaluate prediction performance. For each of the 69 treatment fractions, signals that were acquired around the first 3 minutes (4500 data points) were used as the training signals for training the prediction model, and signals from the following 30 seconds were used as the test signals for assessing the effectiveness of the proposed model. The ahead time (t<sub>ahead</sub>) used in this study was 400 ms [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref5">5</xref>].</p>
        <p>The root mean square errors (RMSEs) between real and predicted signals of respiratory motion in a 3D space were used for assessment [<xref ref-type="bibr" rid="ref6">6</xref>,<xref ref-type="bibr" rid="ref7">7</xref>]. The RMSEs for motion in each direction (<italic>RMSE<sub>SI, LR, AP</sub></italic>) and motion in a 3D space (<italic>RMSE<sub>3D</sub></italic>) were calculated by using equations 3 and 4, respectively, as follows:</p>
        <p>
          <disp-formula>
            <graphic xlink:href="jmir_v23i8e27235_fig10.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </disp-formula>
        </p>
        <p>
          <disp-formula>
            <graphic xlink:href="jmir_v23i8e27235_fig11.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </disp-formula>
        </p>
        <p>
          <disp-formula>
            <graphic xlink:href="jmir_v23i8e27235_fig12.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </disp-formula>
        </p>
        <p>In equation 5, <inline-graphic xlink:href="jmir_v23i8e27235_fig13.png" xlink:type="simple" mimetype="image"/> is the average of the true values, and <inline-graphic xlink:href="jmir_v23i8e27235_fig14.png" xlink:type="simple" mimetype="image"/> is the average of predicted values. Time point t in equation 3 ranged from t<sub>start</sub> (<italic>t<sub>s</sub></italic>+t<italic><sub>delay</sub></italic>+t<italic><sub>ahead</sub></italic>) to t<sub>end</sub>. The Wilcoxon signed-rank test was used as the statistical model for evaluating the differences between true values and predicted values.</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <p><xref ref-type="table" rid="table2">Table 2</xref> presents the RMSEs of the three models (ie, the LSTM, TCN, and no prediction models; ahead time=400 ms). Compared to the no prediction model, the RMSEs for motion in a 3D space were reduced by 46% in the LSTM model and 51% in the TCN model. For motion in all directions, the RMSEs of the TCN model were consistently lower than those of the LSTM model. The RMSE for motion in a 3D space decreased from 0.73 mm (LSTM model) to 0.67 mm (TCN model). The <italic>P</italic> value was &#60;.001, indicating that the TCN method could significantly improve the prediction performance of the LSTM method.</p>
      <table-wrap position="float" id="table2">
        <label>Table 2</label>
        <caption>
          <p>The root mean square errors (RMSEs) of the three prediction models.</p>
        </caption>
        <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
          <col width="250"/>
          <col width="250"/>
          <col width="250"/>
          <col width="250"/>
          <thead>
            <tr valign="bottom">
              <td>Direction</td>
              <td>RMSEs (mm) of the LSTM<sup>a</sup> model</td>
              <td>RMSEs (mm) of the TCN<sup>b</sup> model</td>
              <td>RMSEs (mm) of the no prediction model</td>
            </tr>
          </thead>
          <tbody>
            <tr valign="top">
              <td>Anterior-posterior direction</td>
              <td>0.29</td>
              <td>0.28</td>
              <td>0.50</td>
            </tr>
            <tr valign="top">
              <td>Left-right direction</td>
              <td>0.27</td>
              <td>0.25</td>
              <td>0.45</td>
            </tr>
            <tr valign="top">
              <td>Superior-inferior direction</td>
              <td>0.55</td>
              <td>0.49</td>
              <td>1.04</td>
            </tr>
            <tr valign="top">
              <td>3D space</td>
              <td>0.73</td>
              <td>0.67</td>
              <td>1.36</td>
            </tr>
          </tbody>
        </table>
        <table-wrap-foot>
          <fn id="table2fn1">
            <p><sup>a</sup>LSTM: long short-term memory.</p>
          </fn>
          <fn id="table2fn2">
            <p><sup>b</sup>TCN: temporal convolutional neural network.</p>
          </fn>
        </table-wrap-foot>
      </table-wrap>
      <p><xref rid="figure4" ref-type="fig">Figure 4</xref> shows the RMSEs for motion in all directions with different ahead times. Obviously, the prediction performance of the TCN model was positive compared to that of the LSTM model for all ahead times. Further, the prediction performance of both models worsened as ahead times increased.</p>
      <p><xref rid="figure5" ref-type="fig">Figure 5</xref> illustrates the performance comparison between the TCN and LSTM methods for predicting motion in the superior-inferior direction, anterior-posterior direction, and left-right direction. Obviously, the TCN method was more accurate and robust than the LSTM method.</p>
      <p>We investigated the hyperparameters in the 4D hyperparameter space (625 experiments) for both the TCN and LSTM models by using the grid search method among 5 treatment fractions, which were selected randomly. The options and results of hyperparameter tuning are depicted in <xref ref-type="table" rid="table3">Table 3</xref>.</p>
      <fig id="figure4" position="float">
        <label>Figure 4</label>
        <caption>
          <p>The RMSEs for respiratory motion in all directions. These were determined by using the LSTM and TCN models and different ahead times for each treatment fraction. AP: anterior-posterior; LR: left-right; LSTM: long short-term memory; RMSE: root mean square error; SI: superior-inferior; TCN: temporal convolutional neural network.</p>
        </caption>
        <graphic xlink:href="jmir_v23i8e27235_fig4.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
      </fig>
      <fig id="figure5" position="float">
        <label>Figure 5</label>
        <caption>
          <p>The performance comparison between the TCN and LSTM methods for predicting motion in the (A) superior-inferior direction, (B) left-right direction, and (C) anterior-posterior direction. LSTM: long short-term memory; TCN: temporal convolutional neural network.</p>
        </caption>
        <graphic xlink:href="jmir_v23i8e27235_fig5.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
      </fig>
      <table-wrap position="float" id="table3">
        <label>Table 3</label>
        <caption>
          <p>The options and results of hyperparameter tuning.</p>
        </caption>
        <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
          <col width="30"/>
          <col width="470"/>
          <col width="0"/>
          <col width="300"/>
          <col width="0"/>
          <col width="200"/>
          <thead>
            <tr valign="top">
              <td colspan="3">Models and hyperparameters</td>
              <td colspan="2">Hyperparameter options</td>
              <td>Hyperparameter selected</td>
            </tr>
          </thead>
          <tbody>
            <tr valign="top">
              <td colspan="6">
                <bold>Temporal convolutional neural network model</bold>
              </td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Number of layers</td>
              <td colspan="2">4, 5, 6, 7, and 8</td>
              <td colspan="2">5</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Filter size</td>
              <td colspan="2">1, 3, 5, 7, and 9</td>
              <td colspan="2">9</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Number of neurons in the input layer</td>
              <td colspan="2">5, 10, 15, 20, and 25</td>
              <td colspan="2">15</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Learning rate</td>
              <td colspan="2">0.0001, 0.001, 0.005, 0.01, and 0.1</td>
              <td colspan="2">0.001</td>
            </tr>
            <tr valign="top">
              <td colspan="6">
                <bold>LSTM<sup>a</sup> model</bold>
              </td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Number of LSTM layers</td>
              <td colspan="2">1, 2, 3, 4, and 5</td>
              <td colspan="2">2</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Learning rate</td>
              <td colspan="2">0.0001, 0.001, 0.005, 0.01, and 0.1</td>
              <td colspan="2">0.01</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Number of hidden units per layer</td>
              <td colspan="2">10, 50, 100, 150, 200, and 250</td>
              <td colspan="2">200</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Number of neurons in the input layer</td>
              <td colspan="2">5, 10, 15, 20, and 25</td>
              <td colspan="2"> 20</td>
            </tr>
          </tbody>
        </table>
        <table-wrap-foot>
          <fn id="table3fn1">
            <p><sup>a</sup>LSTM: long short-term memory.</p>
          </fn>
        </table-wrap-foot>
      </table-wrap>
      <p><xref ref-type="table" rid="table4">Table 4</xref> presents the RMSEs of the TCN model for each external marker. <xref rid="figure6" ref-type="fig">Figure 6</xref> shows the RMSEs for respiratory motion in a 3D space among each treatment fraction. The TCN model using 1 or 2 external markers was compared to the TCN model using all 3 external markers. The TCN model had the best performance in terms of predicting motion in all directions when all three external markers were used simultaneously. The average RMSEs for motion in a 3D space when using 1 marker and 2 markers were 0.72 mm and 0.68 mm, respectively. This decreased to 0.67 mm when using all three makers.</p>
      <p>As illustrated in <xref rid="figure7" ref-type="fig">Figure 7</xref>, the ablative analysis of the TCN was also conducted. We focused on two components in this study—the filter size and the residual blocks. We found that the effect of the filter size was small when the filter size was larger than 3. The <italic>P</italic> values between 5 filter size pairs—filter sizes 1 and 3, 3 and 5, 5 and 7, and 7 and 9—were &#60;.001, .11, .20, and .83, respectively. This indicated that prediction performance improved significantly before the filter size rose to 3. Further, we found that the residual blocks contributed significantly to prediction performance, as the <italic>P</italic> value was &#60;.001.</p>
      <table-wrap position="float" id="table4">
        <label>Table 4</label>
        <caption>
          <p>The root mean square errors (RMSEs) of the temporal convolutional neural network model for each external marker (EM).</p>
        </caption>
        <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
          <col width="220"/>
          <col width="120"/>
          <col width="120"/>
          <col width="120"/>
          <col width="120"/>
          <col width="100"/>
          <col width="100"/>
          <col width="100"/>
          <thead>
            <tr valign="top">
              <td>Direction</td>
              <td>RMSEs for all EMs</td>
              <td>RMSEs for EMs 1 and 2</td>
              <td>RMSEs for EMs 1 and 3</td>
              <td>RMSEs for EMs 2 and 3</td>
              <td>RMSEs for EM 1</td>
              <td>RMSEs for EM 2</td>
              <td>RMSEs for EM 3</td>
            </tr>
          </thead>
          <tbody>
            <tr valign="top">
              <td>Anterior-posterior direction</td>
              <td>0.28</td>
              <td>0.28</td>
              <td>0.28</td>
              <td>0.28</td>
              <td>0.29</td>
              <td>0.29</td>
              <td>0.29</td>
            </tr>
            <tr valign="top">
              <td>Left-right direction</td>
              <td>0.25</td>
              <td>0.26</td>
              <td>0.26</td>
              <td>0.25</td>
              <td>0.27</td>
              <td>0.26</td>
              <td>0.26</td>
            </tr>
            <tr valign="top">
              <td>Superior-inferior direction</td>
              <td>0.49</td>
              <td>0.51</td>
              <td>0.50</td>
              <td>0.50</td>
              <td>0.52</td>
              <td>0.53</td>
              <td>0.53</td>
            </tr>
            <tr valign="top">
              <td>3D space</td>
              <td>0.67</td>
              <td>0.69</td>
              <td>0.68</td>
              <td>0.68</td>
              <td>0.71</td>
              <td>0.72</td>
              <td>0.72</td>
            </tr>
          </tbody>
        </table>
      </table-wrap>
      <fig id="figure6" position="float">
        <label>Figure 6</label>
        <caption>
          <p>A comparison of RMSEs for respiratory motion in a 3D space among each treatment fraction. A: Results of the TCN model using 1 external marker compared to those of the TCN model using all 3 external markers. B: Results of the TCN model using 2 external markers compared to those of the TCN model using all 3 external markers. RMSE: root mean square error; TCN: temporal convolutional neural network.</p>
        </caption>
        <graphic xlink:href="jmir_v23i8e27235_fig6.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
      </fig>
      <fig id="figure7" position="float">
        <label>Figure 7</label>
        <caption>
          <p>The effects of different components in the temporal convolutional neural network layer. A: Residual blocks. B: FS. FS: filter size; RMSE: root mean square error.</p>
        </caption>
        <graphic xlink:href="jmir_v23i8e27235_fig7.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
      </fig>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <p>A TCN model for predicting respiratory motion by using external markers’ prior signals was developed and tested in this study. The experiment demonstrated that the TCN model’s performance in predicting future respiratory signals with a 400-ms ahead time was better than that of the LSTM model.</p>
        <p>As is well known, hyperparameter settings have a large influence on the prediction performance of machine learning models. This also holds true for our TCN and LSTM models. We tuned 4 major hyperparameters for both of the TCN and LSTM models. Among these hyperparameters, the number of neurons in the input layer and the learning rate were tested for both models. Having a large number of neurons in the input layer allows for the inclusion of more features in models. Obviously, useful features may increase prediction accuracy. However, redundancy features may also be brought in along with the useful features. Hence, if this hyperparameter is too large, prediction performance may degenerate. The best number of neurons in the input layer for the TCN and LSTM models in this study was 15 and 20, respectively. The learning rate was an important hyperparameter in the model optimization process. If the learning rate is too large, the model may oscillate around the global minimum value instead of achieving convergence. On the other hand, if this value is too small, the training time and the risk of overfitting increase. Learning rates of 0.001 and 0.01 were selected as the optimal hyperparameters of the TCN and LSTM models, respectively. In addition to the two abovementioned hyperparameters, the number of layers and filter sizes were also investigated for the TCN model, whereas the number of LSTM layers and number of hidden units per layer were tested for the LSTM model. With regard to the TCN model, the size of the effective window (receptive field) increased as the number of layers and filter size increased. Hence, these two hyperparameters should guarantee that the receptive field of TCN model covers enough context for respiratory signal prediction. The optimal values for these two hyperparameters in our experiments were 5 and 9, respectively. With regard to the LSTM model, on one hand, a deeper LSTM model (a large number of LSTM layers) may be representative of a more complicated relationship among respiratory signals and improve prediction performance. On the other hand, a deeper LSTM model also has an increased risk of overfitting and increased convergence speed. In this study, the prediction performance results of the LSTM model were comparable when the number of LSTM layers was over 2. Hence, we selected 2 as the optimal number of LSTM layers. Further, the number of hidden units per layer determined the width of each LSTM layer. We also found that having a large number of hidden units per layer was helpful for establishing a more complicated prediction model, but at the same time, this increased the risk of overfitting and convergence speed.</p>
        <p>The effect that different numbers of external markers had on prediction performance was also investigated in this study. The TCN model had the best prediction performance when it used all three markers’ positions. As shown in <xref rid="figure6" ref-type="fig">Figure 6</xref>, the TCN model’s prediction performance when using 3 markers was more robust than when using 1 marker or 2 markers. For most treatment fractions, the RMSEs of the TCN model using 3 markers was slightly smaller than those obtained by using 1 marker or 2 markers. However, for some treatment fractions, such as treatment fractions 7 and 11, the RMSEs of predictions based on 1 or 2 external markers were quite larger than those of predictions based on 3 external markers. This was probably because having more external markers for different skin surface positions resulted in the inclusion of more useful features. Such useful features may alleviate the overfitting and underfitting problems.</p>
        <p>Finally, we studied the influence of the different components (the filter size and residual blocks) in the TCN model. The size of the effective window (receptive field) increased with filter size. Hence, the model’s prediction performance initially became better as the filter size increased. However, the model’s prediction performance only slightly improved as the filter size increased continually. This may be because the receptive field that resulted from using a filter size of 3 provided enough context for the respiratory signal prediction task. On the other hand, we observed that the residual block architecture enhanced the model’s prediction performance immensely. We believe that this was because the residual blocks effectively allowed the TCN model to be modified based on identity mapping instead of a full transformation, which was crucial for the deep neural network architecture.</p>
      </sec>
      <sec>
        <title>Conclusion</title>
        <p>A deep learning approach based on the TCN architecture was developed to predict internal tumor positions with a 400-ms ahead time based on the external markers’ positions in this study. The results demonstrated that this model could predict tumor positions accurately. Further, the prediction performance of the TCN model using multiple external markers was more robust and positive than that of the TCN model using 1 or 2 external markers.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group/>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">LSTM</term>
          <def>
            <p>long short-term memory</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">RMSE</term>
          <def>
            <p>root mean square error</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">RNN</term>
          <def>
            <p>recurrent neural network</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">TCN</term>
          <def>
            <p>temporal convolutional neural network</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>This work was partially supported by the National Natural Science Foundation of China (62103366), the General Project of Chongqing Natural Science Foundation (grant cstc2020jcyj-msxm2928), Seed Grant of the First Affiliated Hospital of Chongqing Medical University (grant PYJJ2019-208), Chongqing Municipal Bureau of Human Resources and Social Security Fund (grant cx2018147), and Medical Research Key Project of Jiangsu Health Commission (grant ZDB 2020022).</p>
    </ack>
    <fn-group>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ren</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Nishioka</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Shirato</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Berbeco</surname>
              <given-names>RI</given-names>
            </name>
          </person-group>
          <article-title>Adaptive prediction of respiratory motion for motion compensation radiotherapy</article-title>
          <source>Phys Med Biol</source>
          <year>2007</year>
          <month>11</month>
          <day>21</day>
          <volume>52</volume>
          <issue>22</issue>
          <fpage>6651</fpage>
          <lpage>6661</lpage>
          <pub-id pub-id-type="doi">10.1088/0031-9155/52/22/007</pub-id>
          <pub-id pub-id-type="medline">17975289</pub-id>
          <pub-id pub-id-type="pii">S0031-9155(07)49903-1</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>McCall</surname>
              <given-names>KC</given-names>
            </name>
            <name name-style="western">
              <surname>Jeraj</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Dual-component model of respiratory motion based on the periodic autoregressive moving average (periodic ARMA) method</article-title>
          <source>Phys Med Biol</source>
          <year>2007</year>
          <month>06</month>
          <day>21</day>
          <volume>52</volume>
          <issue>12</issue>
          <fpage>3455</fpage>
          <lpage>3466</lpage>
          <pub-id pub-id-type="doi">10.1088/0031-9155/52/12/009</pub-id>
          <pub-id pub-id-type="medline">17664554</pub-id>
          <pub-id pub-id-type="pii">S0031-9155(07)41733-X</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bukhari</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Hong</surname>
              <given-names>SM</given-names>
            </name>
          </person-group>
          <article-title>Real-time prediction and gating of respiratory motion using an extended Kalman filter and Gaussian process regression</article-title>
          <source>Phys Med Biol</source>
          <year>2015</year>
          <month>01</month>
          <day>07</day>
          <volume>60</volume>
          <issue>1</issue>
          <fpage>233</fpage>
          <lpage>252</lpage>
          <pub-id pub-id-type="doi">10.1088/0031-9155/60/1/233</pub-id>
          <pub-id pub-id-type="medline">25489980</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Verma</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Langer</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Das</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Sandison</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Survey: Real-time tumor motion prediction for image-guided radiation treatment</article-title>
          <source>Comput Sci Eng</source>
          <year>2011</year>
          <month>09</month>
          <volume>13</volume>
          <issue>5</issue>
          <fpage>24</fpage>
          <lpage>35</lpage>
          <pub-id pub-id-type="doi">10.1109/mcse.2010.99</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Riaz</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Shanker</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Wiersma</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Gudmundsson</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Mao</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Widrow</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Xing</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Predicting respiratory tumor motion with multi-dimensional adaptive filters and support vector regression</article-title>
          <source>Phys Med Biol</source>
          <year>2009</year>
          <month>10</month>
          <day>07</day>
          <volume>54</volume>
          <issue>19</issue>
          <fpage>5735</fpage>
          <lpage>5748</lpage>
          <pub-id pub-id-type="doi">10.1088/0031-9155/54/19/005</pub-id>
          <pub-id pub-id-type="medline">19729711</pub-id>
          <pub-id pub-id-type="pii">S0031-9155(09)14759-0</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yan</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Yin</surname>
              <given-names>FF</given-names>
            </name>
            <name name-style="western">
              <surname>Zhu</surname>
              <given-names>GP</given-names>
            </name>
            <name name-style="western">
              <surname>Ajlouni</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>JH</given-names>
            </name>
          </person-group>
          <article-title>The correlation evaluation of a tumor tracking system using multiple external markers</article-title>
          <source>Med Phys</source>
          <year>2006</year>
          <month>11</month>
          <volume>33</volume>
          <issue>11</issue>
          <fpage>4073</fpage>
          <lpage>4084</lpage>
          <pub-id pub-id-type="doi">10.1118/1.2358830</pub-id>
          <pub-id pub-id-type="medline">17153387</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sun</surname>
              <given-names>WZ</given-names>
            </name>
            <name name-style="western">
              <surname>Jiang</surname>
              <given-names>MY</given-names>
            </name>
            <name name-style="western">
              <surname>Ren</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Dang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>You</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Yin</surname>
              <given-names>FF</given-names>
            </name>
          </person-group>
          <article-title>Respiratory signal prediction based on adaptive boosting and multi-layer perceptron neural network</article-title>
          <source>Phys Med Biol</source>
          <year>2017</year>
          <month>08</month>
          <day>03</day>
          <volume>62</volume>
          <issue>17</issue>
          <fpage>6822</fpage>
          <lpage>6835</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/28665297"/>
          </comment>
          <pub-id pub-id-type="doi">10.1088/1361-6560/aa7cd4</pub-id>
          <pub-id pub-id-type="medline">28665297</pub-id>
          <pub-id pub-id-type="pmcid">PMC5555420</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ernst</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Schlaefer</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Schweikard</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Predicting the outcome of respiratory motion prediction</article-title>
          <source>Med Phys</source>
          <year>2011</year>
          <month>10</month>
          <volume>38</volume>
          <issue>10</issue>
          <fpage>5569</fpage>
          <lpage>5581</lpage>
          <pub-id pub-id-type="doi">10.1118/1.3633907</pub-id>
          <pub-id pub-id-type="medline">21992375</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Torshabi</surname>
              <given-names>AE</given-names>
            </name>
            <name name-style="western">
              <surname>Riboldi</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Fooladi</surname>
              <given-names>AAI</given-names>
            </name>
            <name name-style="western">
              <surname>Mosalla</surname>
              <given-names>SMM</given-names>
            </name>
            <name name-style="western">
              <surname>Baroni</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>An adaptive fuzzy prediction model for real time tumor tracking in radiotherapy via external surrogates</article-title>
          <source>J Appl Clin Med Phys</source>
          <year>2013</year>
          <month>01</month>
          <day>07</day>
          <volume>14</volume>
          <issue>1</issue>
          <fpage>4008</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/23318386"/>
          </comment>
          <pub-id pub-id-type="doi">10.1120/jacmp.v14i1.4008</pub-id>
          <pub-id pub-id-type="medline">23318386</pub-id>
          <pub-id pub-id-type="pmcid">PMC5713918</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Vergalasova</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Cai</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Yin</surname>
              <given-names>FF</given-names>
            </name>
          </person-group>
          <article-title>A novel technique for markerless, self-sorted 4D-CBCT: feasibility study</article-title>
          <source>Med Phys</source>
          <year>2012</year>
          <month>03</month>
          <volume>39</volume>
          <issue>3</issue>
          <fpage>1442</fpage>
          <lpage>1451</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/22380377"/>
          </comment>
          <pub-id pub-id-type="doi">10.1118/1.3685443</pub-id>
          <pub-id pub-id-type="medline">22380377</pub-id>
          <pub-id pub-id-type="pmcid">PMC3298564</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Berbeco</surname>
              <given-names>RI</given-names>
            </name>
            <name name-style="western">
              <surname>Nishioka</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Shirato</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Jiang</surname>
              <given-names>SB</given-names>
            </name>
          </person-group>
          <article-title>Residual motion of lung tumors in end-of-inhale respiratory gated radiotherapy based on external surrogates</article-title>
          <source>Med Phys</source>
          <year>2006</year>
          <month>11</month>
          <volume>33</volume>
          <issue>11</issue>
          <fpage>4149</fpage>
          <lpage>4156</lpage>
          <pub-id pub-id-type="doi">10.1118/1.2358197</pub-id>
          <pub-id pub-id-type="medline">17153393</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Shirato</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Suzuki</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Sharp</surname>
              <given-names>GC</given-names>
            </name>
            <name name-style="western">
              <surname>Fujita</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Onimaru</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Fujino</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Kato</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Osaka</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Kinoshita</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Taguchi</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Onodera</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Miyasaka</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Speed and amplitude of lung tumor motion precisely detected in four-dimensional setup and in real-time tumor-tracking radiotherapy</article-title>
          <source>Int J Radiat Oncol Biol Phys</source>
          <year>2006</year>
          <month>03</month>
          <day>15</day>
          <volume>64</volume>
          <issue>4</issue>
          <fpage>1229</fpage>
          <lpage>1236</lpage>
          <pub-id pub-id-type="doi">10.1016/j.ijrobp.2005.11.016</pub-id>
          <pub-id pub-id-type="medline">16504762</pub-id>
          <pub-id pub-id-type="pii">S0360-3016(05)02973-1</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Goodband</surname>
              <given-names>JH</given-names>
            </name>
            <name name-style="western">
              <surname>Haas</surname>
              <given-names>OCL</given-names>
            </name>
            <name name-style="western">
              <surname>Mills</surname>
              <given-names>JA</given-names>
            </name>
          </person-group>
          <article-title>A comparison of neural network approaches for on-line prediction in IGRT</article-title>
          <source>Med Phys</source>
          <year>2008</year>
          <month>03</month>
          <volume>35</volume>
          <issue>3</issue>
          <fpage>1113</fpage>
          <lpage>1122</lpage>
          <pub-id pub-id-type="doi">10.1118/1.2836416</pub-id>
          <pub-id pub-id-type="medline">18404946</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Nelson</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Starkschall</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Balter</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Fitzpatrick</surname>
              <given-names>MJ</given-names>
            </name>
            <name name-style="western">
              <surname>Antolak</surname>
              <given-names>JA</given-names>
            </name>
            <name name-style="western">
              <surname>Tolani</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Prado</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Respiration-correlated treatment delivery using feedback-guided breath hold: a technical study</article-title>
          <source>Med Phys</source>
          <year>2005</year>
          <month>01</month>
          <volume>32</volume>
          <issue>1</issue>
          <fpage>175</fpage>
          <lpage>181</lpage>
          <pub-id pub-id-type="doi">10.1118/1.1836332</pub-id>
          <pub-id pub-id-type="medline">15719968</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hansen</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Ravkilde</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Worm</surname>
              <given-names>ES</given-names>
            </name>
            <name name-style="western">
              <surname>Toftegaard</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Grau</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Macek</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Poulsen</surname>
              <given-names>PR</given-names>
            </name>
          </person-group>
          <article-title>Electromagnetic guided couch and multileaf collimator tracking on a TrueBeam accelerator</article-title>
          <source>Med Phys</source>
          <year>2016</year>
          <month>05</month>
          <volume>43</volume>
          <issue>5</issue>
          <fpage>2387</fpage>
          <pub-id pub-id-type="doi">10.1118/1.4946815</pub-id>
          <pub-id pub-id-type="medline">27147350</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sharp</surname>
              <given-names>GC</given-names>
            </name>
            <name name-style="western">
              <surname>Jiang</surname>
              <given-names>SB</given-names>
            </name>
            <name name-style="western">
              <surname>Shimizu</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Shirato</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Prediction of respiratory tumour motion for real-time image-guided radiotherapy</article-title>
          <source>Phys Med Biol</source>
          <year>2004</year>
          <month>02</month>
          <day>07</day>
          <volume>49</volume>
          <issue>3</issue>
          <fpage>425</fpage>
          <lpage>440</lpage>
          <pub-id pub-id-type="doi">10.1088/0031-9155/49/3/006</pub-id>
          <pub-id pub-id-type="medline">15012011</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ma</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Tao</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Yu</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Long short-term memory neural network for traffic speed prediction using remote microwave sensor data</article-title>
          <source>Transp Res Part C Emerg Technol</source>
          <year>2015</year>
          <month>05</month>
          <volume>54</volume>
          <fpage>187</fpage>
          <lpage>197</lpage>
          <pub-id pub-id-type="doi">10.1016/j.trc.2015.03.014</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bao</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Yue</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Rao</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>A deep learning framework for financial time series using stacked autoencoders and long-short term memory</article-title>
          <source>PLoS One</source>
          <year>2017</year>
          <month>07</month>
          <day>14</day>
          <volume>12</volume>
          <issue>7</issue>
          <fpage>e0180944</fpage>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0180944</pub-id>
          <pub-id pub-id-type="medline">28708865</pub-id>
          <pub-id pub-id-type="pii">PONE-D-16-50177</pub-id>
          <pub-id pub-id-type="pmcid">PMC5510866</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Shi</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Chan</surname>
              <given-names>MF</given-names>
            </name>
            <name name-style="western">
              <surname>Tang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Ji</surname>
              <given-names>W</given-names>
            </name>
          </person-group>
          <article-title>Towards real-time respiratory motion prediction based on long short-term memory neural networks</article-title>
          <source>Phys Med Biol</source>
          <year>2019</year>
          <month>04</month>
          <day>10</day>
          <volume>64</volume>
          <issue>8</issue>
          <fpage>085010</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/30917344"/>
          </comment>
          <pub-id pub-id-type="doi">10.1088/1361-6560/ab13fa</pub-id>
          <pub-id pub-id-type="medline">30917344</pub-id>
          <pub-id pub-id-type="pmcid">PMC6547821</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Dauphin</surname>
              <given-names>YN</given-names>
            </name>
            <name name-style="western">
              <surname>Fan</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Auli</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Grangier</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Language modeling with gated convolutional networks</article-title>
          <year>2017</year>
          <month>08</month>
          <conf-name>The 34th International Conference on Machine Learning</conf-name>
          <conf-date>August 6-11, 2017</conf-date>
          <conf-loc>Sydney, Australia</conf-loc>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gehring</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Auli</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Grangier</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Yarats</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Dauphin</surname>
              <given-names>YN</given-names>
            </name>
          </person-group>
          <article-title>Convolutional sequence to sequence learning</article-title>
          <year>2017</year>
          <month>08</month>
          <conf-name>The 34th International Conference on Machine Learning</conf-name>
          <conf-date>August 6-11, 2017</conf-date>
          <conf-loc>Sydney, Australia</conf-loc>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kalchbrenner</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Espeholt</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Simonyan</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>van den Oord</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Graves</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Kavukcuoglu</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Neural machine translation in linear time</article-title>
          <source>arXiv.</source>
          <comment>Preprint posted online on October 31, 2016
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/pdf/1610.10099v1.pdf"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bai</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Kolter</surname>
              <given-names>JZ</given-names>
            </name>
            <name name-style="western">
              <surname>Koltun</surname>
              <given-names>V</given-names>
            </name>
          </person-group>
          <article-title>An empirical evaluation of generic convolutional and recurrent networks for sequence modeling</article-title>
          <source>arXiv.</source>
          <comment>Preprint posted online on April 19, 2018
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/pdf/1803.01271.pdf"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Suh</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Dieterich</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Cho</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Keall</surname>
              <given-names>PJ</given-names>
            </name>
          </person-group>
          <article-title>An analysis of thoracic and abdominal tumour motion for stereotactic body radiotherapy patients</article-title>
          <source>Phys Med Biol</source>
          <year>2008</year>
          <month>07</month>
          <day>07</day>
          <volume>53</volume>
          <issue>13</issue>
          <fpage>3623</fpage>
          <lpage>3640</lpage>
          <pub-id pub-id-type="doi">10.1088/0031-9155/53/13/016</pub-id>
          <pub-id pub-id-type="medline">18560046</pub-id>
          <pub-id pub-id-type="pii">S0031-9155(08)69101-0</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bengio</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Simard</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Frasconi</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Learning long-term dependencies with gradient descent is difficult</article-title>
          <source>IEEE Trans Neural Netw</source>
          <year>1994</year>
          <volume>5</volume>
          <issue>2</issue>
          <fpage>157</fpage>
          <lpage>166</lpage>
          <pub-id pub-id-type="doi">10.1109/72.279181</pub-id>
          <pub-id pub-id-type="medline">18267787</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Pham</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Tran</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Phung</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Venkatesh</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Predicting healthcare trajectories from medical records: A deep learning approach</article-title>
          <source>J Biomed Inform</source>
          <year>2017</year>
          <month>05</month>
          <volume>69</volume>
          <fpage>218</fpage>
          <lpage>229</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(17)30071-0"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2017.04.001</pub-id>
          <pub-id pub-id-type="medline">28410981</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(17)30071-0</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Pascanu</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Mikolov</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Bengio</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>On the difficulty of training recurrent neural networks</article-title>
          <year>2013</year>
          <month>06</month>
          <day>16</day>
          <conf-name>The 30th International Conference on International Conference on Machine Learning (ICML)</conf-name>
          <conf-date>June 16-21, 2013</conf-date>
          <conf-loc>Atlanta, USA</conf-loc>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hochreiter</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Schmidhuber</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Long short-term memory</article-title>
          <source>Neural Comput</source>
          <year>1997</year>
          <month>11</month>
          <day>15</day>
          <volume>9</volume>
          <issue>8</issue>
          <fpage>1735</fpage>
          <lpage>1780</lpage>
          <pub-id pub-id-type="doi">10.1162/neco.1997.9.8.1735</pub-id>
          <pub-id pub-id-type="medline">9377276</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>He</surname>
              <given-names>KM</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>XY</given-names>
            </name>
            <name name-style="western">
              <surname>Ren</surname>
              <given-names>SQ</given-names>
            </name>
            <name name-style="western">
              <surname>Sun</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Deep residual learning for image recognition</article-title>
          <year>2016</year>
          <conf-name>2016 IEEE Conference on Computer Vision and Pattern Recognition (CVPR)</conf-name>
          <conf-date>June 27-30, 2016</conf-date>
          <conf-loc>Las Vegas, Nevada, USA</conf-loc>
          <pub-id pub-id-type="doi">10.1109/cvpr.2016.90</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kingma</surname>
              <given-names>DP</given-names>
            </name>
            <name name-style="western">
              <surname>Ba</surname>
              <given-names>JL</given-names>
            </name>
          </person-group>
          <article-title>Adam: A method for stochastic optimization</article-title>
          <source>arXiv.</source>
          <comment>Preprint posted online on December 22, 2014
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/pdf/1412.6980v1.pdf"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Goodfellow</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Bengio</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Courville</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <source>Deep Learning</source>
          <year>2016</year>
          <month>11</month>
          <publisher-loc>Cambridge, Massachusetts</publisher-loc>
          <publisher-name>MIT Press</publisher-name>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
