<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="review-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMIR</journal-id>
      <journal-id journal-id-type="nlm-ta">J Med Internet Res</journal-id>
      <journal-title>Journal of Medical Internet Research</journal-title>
      <issn pub-type="epub">1438-8871</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v22i7e18477</article-id>
      <article-id pub-id-type="pmid">32706670</article-id>
      <article-id pub-id-type="doi">10.2196/18477</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Review</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Review</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Reinforcement Learning for Clinical Decision Support in Critical Care: Comprehensive Review</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Eysenbach</surname>
            <given-names>Gunther</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Hallawa</surname>
            <given-names>Ahmed</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Zhang</surname>
            <given-names>Zhongheng</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Maslove</surname>
            <given-names>David</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author">
          <name name-style="western">
            <surname>Liu</surname>
            <given-names>Siqi</given-names>
          </name>
          <degrees>BEng</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-0605-2958</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author">
          <name name-style="western">
            <surname>See</surname>
            <given-names>Kay Choong</given-names>
          </name>
          <degrees>MBBS</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-2528-7282</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Ngiam</surname>
            <given-names>Kee Yuan</given-names>
          </name>
          <degrees>MBBS, MRCS, MMed, FRCS</degrees>
          <xref rid="aff4" ref-type="aff">4</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-5676-2520</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Celi</surname>
            <given-names>Leo Anthony</given-names>
          </name>
          <degrees>MD, MS, MPH</degrees>
          <xref rid="aff5" ref-type="aff">5</xref>
          <xref rid="aff6" ref-type="aff">6</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-6712-6626</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author">
          <name name-style="western">
            <surname>Sun</surname>
            <given-names>Xingzhi</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff7" ref-type="aff">7</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-6519-0197</ext-link>
        </contrib>
        <contrib id="contrib6" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Feng</surname>
            <given-names>Mengling</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <address>
            <institution>Saw Swee Hock School of Public Health</institution>
            <institution>National University of Singapore</institution>
            <addr-line>12 Science Drive 2, #10-01</addr-line>
            <addr-line>Singapore, 117549</addr-line>
            <country>Singapore</country>
            <phone>65 65164988</phone>
            <email>ephfm@nus.edu.sg</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-5338-6248</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>NUS Graduate School for Integrative Science and Engineering</institution>
        <institution>National University of Singapore</institution>
        <addr-line>Singapore</addr-line>
        <country>Singapore</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Saw Swee Hock School of Public Health</institution>
        <institution>National University of Singapore</institution>
        <addr-line>Singapore</addr-line>
        <country>Singapore</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>Division of Respiratory &#38; Critical Care Medicine</institution>
        <institution>National University Hospital</institution>
        <addr-line>Singapore</addr-line>
        <country>Singapore</country>
      </aff>
      <aff id="aff4">
        <label>4</label>
        <institution>Group Chief Technology Office</institution>
        <institution>National University Health System</institution>
        <addr-line>Singapore</addr-line>
        <country>Singapore</country>
      </aff>
      <aff id="aff5">
        <label>5</label>
        <institution>Institute for Medical Engineering and Science</institution>
        <institution>Massachusetts Institute of Technology</institution>
        <addr-line>Cambridge, MA</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff6">
        <label>6</label>
        <institution>Division of Pulmonary, Critical Care and Sleep Medicine</institution>
        <institution>Beth Israel Deaconess Medical Center</institution>
        <addr-line>Boston, MA</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff7">
        <label>7</label>
        <institution>Ping An Health Technology</institution>
        <addr-line>Beijing</addr-line>
        <country>China</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Mengling Feng <email>ephfm@nus.edu.sg</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <month>7</month>
        <year>2020</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>20</day>
        <month>7</month>
        <year>2020</year>
      </pub-date>
      <volume>22</volume>
      <issue>7</issue>
      <elocation-id>e18477</elocation-id>
      <history>
        <date date-type="received">
          <day>28</day>
          <month>2</month>
          <year>2020</year>
        </date>
        <date date-type="rev-request">
          <day>8</day>
          <month>4</month>
          <year>2020</year>
        </date>
        <date date-type="rev-recd">
          <day>5</day>
          <month>5</month>
          <year>2020</year>
        </date>
        <date date-type="accepted">
          <day>13</day>
          <month>5</month>
          <year>2020</year>
        </date>
      </history>
      <copyright-statement>©Siqi Liu, Kay Choong See, Kee Yuan Ngiam, Leo Anthony Celi, Xingzhi Sun, Mengling Feng. Originally published in the Journal of Medical Internet Research (http://www.jmir.org), 20.07.2020.</copyright-statement>
      <copyright-year>2020</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in the Journal of Medical Internet Research, is properly cited. The complete bibliographic information, a link to the original publication on http://www.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://www.jmir.org/2020/7/e18477" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Decision support systems based on reinforcement learning (RL) have been implemented to facilitate the delivery of personalized care. This paper aimed to provide a comprehensive review of RL applications in the critical care setting.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>This review aimed to survey the literature on RL applications for clinical decision support in critical care and to provide insight into the challenges of applying various RL models.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>We performed an extensive search of the following databases: PubMed, Google Scholar, Institute of Electrical and Electronics Engineers (IEEE), ScienceDirect, Web of Science, Medical Literature Analysis and Retrieval System Online (MEDLINE), and Excerpta Medica Database (EMBASE). Studies published over the past 10 years (2010-2019) that have applied RL for critical care were included.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>We included 21 papers and found that RL has been used to optimize the choice of medications, drug dosing, and timing of interventions and to target personalized laboratory values. We further compared and contrasted the design of the RL models and the evaluation metrics for each application.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>RL has great potential for enhancing decision making in critical care. Challenges regarding RL system design, evaluation metrics, and model choice exist. More importantly, further work is required to validate RL in authentic clinical environments.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>artificial intelligence</kwd>
        <kwd>reinforcement learning</kwd>
        <kwd>critical care</kwd>
        <kwd>decision support systems, clinical</kwd>
        <kwd>intensive care unit</kwd>
        <kwd>machine learning</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <sec>
        <title>Background</title>
        <p>In the health care domain, clinical processes are dynamic because of the high prevalence of complex diseases and dynamic changes in the clinical conditions of patients. Existing treatment recommendation systems are mainly implemented using rule-based protocols defined by physicians based on evidence-based clinical guidelines or best practices [<xref ref-type="bibr" rid="ref1">1</xref>-<xref ref-type="bibr" rid="ref3">3</xref>]. In addition, these protocols and guidelines may not consider multiple comorbid conditions [<xref ref-type="bibr" rid="ref4">4</xref>]. In an intensive care unit (ICU), critically ill patients may benefit from deviation from established treatment protocols and from personalizing patient care using means not based on rules [<xref ref-type="bibr" rid="ref5">5</xref>,<xref ref-type="bibr" rid="ref6">6</xref>].</p>
        <p>When physicians need to adapt treatment for individual patients, they may take reference from randomized controlled trials (RCTs), systemic reviews, and meta-analyses. However, RCTs may not be available or definitive for many ICU conditions. Many patients admitted to ICUs might also be too ill for inclusion in clinical trials [<xref ref-type="bibr" rid="ref6">6</xref>]. Furthermore, only 9% of treatment recommendations in the ICU are based on RCTs [<xref ref-type="bibr" rid="ref7">7</xref>], and the vast majority of RCTs in critical care have negative findings [<xref ref-type="bibr" rid="ref8">8</xref>]. To aid clinical decisions in ICUs, we need other methods, including the use of large observational data sets. ICU data can be useful for learning about patients as they were collected in a data-rich environment. A large amount of data can then be fed into artificial intelligence (AI) systems (using computers to mimic human cognitive functions) and machine learning methods (using computer algorithms to perform clinical tasks without the need for explicit instructions). AI and machine learning can then help with diagnosis [<xref ref-type="bibr" rid="ref9">9</xref>,<xref ref-type="bibr" rid="ref10">10</xref>], treatment [<xref ref-type="bibr" rid="ref11">11</xref>,<xref ref-type="bibr" rid="ref12">12</xref>], and resource management [<xref ref-type="bibr" rid="ref13">13</xref>,<xref ref-type="bibr" rid="ref14">14</xref>] in the ICU. Given the dynamic nature of critically ill patients, one machine learning method called reinforcement learning (RL) is particularly suitable for ICU settings.</p>
      </sec>
      <sec>
        <title>Fundamentals of Reinforcement Learning</title>
        <p>RL is a goal-oriented learning tool where a computer <italic>agent</italic>, acting as a decision maker, analyzes available data within its defined environment [<xref ref-type="bibr" rid="ref15">15</xref>], derives a rule for taking actions, and optimizes long-term rewards. The agent is the RL model that we wish to develop. In general, an RL agent receives evaluative feedback about the performance of its action in each time step, allowing it to improve the performance of subsequent actions by trial and error [<xref ref-type="bibr" rid="ref16">16</xref>]. Mathematically, this sequential decision-making process is called the Markov decision process (MDP) [<xref ref-type="bibr" rid="ref17">17</xref>]. An MDP is defined by 4 major components: (1) a state that represents the environment at each time; (2) an action the agent takes at each time that influences the next state; (3) a transition probability that provides an estimate for reaching different subsequent states, which reflects the environment for an agent to interact with; (4) a reward function is the observed feedback given a state-action pair. The solution of the MDP is an optimized set of rules and is termed the policy.</p>
        <p>RL has already emerged as an effective tool to solve complicated control problems with large-scale, high-dimensional data in some application domains, including video games, board games, and autonomous control [<xref ref-type="bibr" rid="ref18">18</xref>-<xref ref-type="bibr" rid="ref20">20</xref>]. In these domains, RL has been proven to achieve human-level capacity for learning complex sequential decisions. For instance, Alpha Go is an RL agent for playing the strategy board game Go. On the basis of Alpha Go’s learned policy, and given the current position of the Go stones, it is possible to decide where the next white/black stone should be placed on the board to maximize its chance of winning.</p>
      </sec>
      <sec>
        <title>Analogies to Critical Care</title>
        <p>For critical care, given the large amount and granular nature of recorded data, RL is well suited for providing sequential treatment suggestions, optimizing treatments, and improving outcomes for new ICU patients. RL also has the potential to expand our understanding of existing clinical protocols by automatically exploring various treatment options. The RL agent analyzes the patient trajectories, and through trial and error, derives a policy, a personalized treatment protocol that optimizes the probability of favorable clinical outcomes (eg, survival). As this computerized process is an attempt to mimic the human clinician’s thought process, RL has also been called the AI clinician [<xref ref-type="bibr" rid="ref21">21</xref>].</p>
        <p>We can consider the state as the well-being/condition of a patient. The state of the patients could depend on static traits (eg, patient demographics including age, gender, ethnicity, pre-existing comorbidity) and longitudinal measurements (eg, vital signs, laboratory test results). An action is a treatment or an intervention that physicians do for patients (eg, prescription of medications and ordering of laboratory tests). The transition probability is the likelihood of state transitions, and it is viewed as a prognosis. If the well-being in the new state is improved, we assign a reward to the RL agent, but we penalize the agent if the patient's condition worsens or stays stagnant after the intervention.</p>
        <p>As illustrated in <xref rid="figure1" ref-type="fig">Figure 1</xref>, if we take a snapshot of the current well-being of a patient as his/her state, the physician would provide a treatment or an intervention (an action) to the patient. This action would lead the patient to the next state depending on his/her current state and the action performed on him/her. While knowing the next state of the patient, the physician would need to take another action according to the new state. These state-action pairs would continue to rollout over time, and the resultant trajectory of state-action pairs could represent the changes in the patients’ conditions and the sequential treatment decisions that were performed by the physicians. We can define the length of the trajectory for each patient as fixed (eg, during the first 24 hours of the ICUs stay) or as dynamic (eg, different patients could be discharged from the ICUs at different times).</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>Illustration of reinforcement learning in critical care.</p>
          </caption>
          <graphic xlink:href="jmir_v22i7e18477_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <p>The main objective of the RL algorithm is to train an agent that can maximize the cumulative future reward from the state-action pairs given the patients’ state-action trajectories. When a new state is observed, the agent is able to perform an action, which could choose the action for the greatest long-term outcome (eg, survival). When the RL agent is well-trained, it is possible to pick the best action given the state of a patient, and we describe this process as acting according to an optimal policy.</p>
        <p>A policy is analogous to a clinical protocol. Nonetheless, a policy has advantages over a clinical protocol because it is capable of capturing more personalized details of individual patients. A policy can be represented by a table where it maps all possible states with actions. Alternatively, a policy could also be represented by a deep neural network (DNN) where given the input of a patient’s state, the DNN model outputs the highest probability of an action. An optimal policy can be trained using various RL algorithms. Some widely applied RL algorithms include the fitted-Q-iteration (FQI) [<xref ref-type="bibr" rid="ref22">22</xref>], deep Q network (DQN) [<xref ref-type="bibr" rid="ref23">23</xref>], actor-critic network [<xref ref-type="bibr" rid="ref24">24</xref>], and model-based RL [<xref ref-type="bibr" rid="ref25">25</xref>]. More technical details about various RL models have been explained [<xref ref-type="bibr" rid="ref26">26</xref>,<xref ref-type="bibr" rid="ref27">27</xref>].</p>
        <p>As RL in critical care is a relatively nascent field, we therefore aimed to review all the existing clinical applications that applied RL in the ICU setting for decision support over the past 10 years (2010-2019). Specifically, we aimed to categorize RL applications and summarize and compare different RL designs. We hope that our overview of RL applications in critical care can help reveal both the advances and gaps for future clinical development of RL. A detailed explanation of the concept of RL and its algorithms is available in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> [<xref ref-type="bibr" rid="ref28">28</xref>].</p>
      </sec>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Search Strategy</title>
        <p>A review of the literature was conducted using the following 7 databases: PubMed, Institute of Electrical and Electronics Engineers (IEEE), Google Scholar, Medical Literature Analysis and Retrieval System Online (MEDLINE), Excerpta Medica Database (EMBASE), ScienceDirect, and Web of Science. The search terms <italic>reinforcement learning, critical care, intensive care, intensive care units,</italic> and <italic>ICUs</italic> were combined. The search phrases listed in <xref ref-type="boxed-text" rid="box1">Textbox 1</xref> were used to identify articles in each database.</p>
        <boxed-text id="box1" position="float">
          <title>Queries used to retrieve records.</title>
          <p>EMBASE (Excerpta Medica Database)</p>
          <list list-type="bullet">
            <list-item>
              <p>#1 ‘reinforcement learning’</p>
            </list-item>
            <list-item>
              <p>#2 ‘intensive care unit’ OR ‘critical care’ OR ‘ICU’</p>
            </list-item>
            <list-item>
              <p>#1 AND #2</p>
            </list-item>
          </list>
          <p>Google Scholar</p>
          <list list-type="bullet">
            <list-item>
              <p>(conference OR journal) AND (“intensive care unit” OR “critical care” OR ICU) AND “reinforcement learning” -survey -reviews -reviewed -news</p>
            </list-item>
          </list>
          <p>IEEE (Institute of Electrical and Electronics Engineers)</p>
          <list list-type="bullet">
            <list-item>
              <p>((“Full Text Only”: “reinforcement learning”) AND “Full Text Only”: “intensive care units”) OR ((“Full Text Only”: “reinforcement learning”) AND “Full Text Only”: “critical care”)</p>
            </list-item>
          </list>
          <p>MEDLINE (Medical Literature Analysis and Retrieval System Online)</p>
          <list list-type="bullet">
            <list-item>
              <p>multifield search=reinforcement learning, critical care, intensive care</p>
            </list-item>
          </list>
          <p>PubMed</p>
          <list list-type="bullet">
            <list-item>
              <p>(“reinforcement learning”) AND ((“ICU”) OR (“critical care”) OR (“intensive care unit”) OR (“intensive care”))</p>
            </list-item>
          </list>
          <p>ScienceDirect</p>
          <list list-type="bullet">
            <list-item>
              <p>“reinforcement learning” AND (“critical care” OR “intensive care” OR “ICU”)</p>
            </list-item>
          </list>
          <p>Web of Science</p>
          <list list-type="bullet">
            <list-item>
              <p>ALL=(intensive care unit OR “critical care” OR “ICU”) AND ((ALL=(“reinforcement learning”)) AND LANGUAGE: (English))</p>
            </list-item>
          </list>
        </boxed-text>
      </sec>
      <sec>
        <title>Inclusion Criteria</title>
        <p>To be eligible for inclusion in this review, the primary requirement was that the article needed to focus on the implementation, evaluation, or use of an RL algorithm to process or analyze patient information (including simulated data) in an ICU setting. Papers published from January 1, 2010, to October 19, 2019 were selected. General review articles and articles not published in English were excluded. Only papers that discussed sufficient details on the data, method, and results were included in this review.</p>
      </sec>
      <sec>
        <title>Data Synthesis</title>
        <p>Data were manually extracted from the articles included in the review. A formal quality assessment was not conducted, as relevant reporting standards have not been established for articles on RL. Instead, we extracted the following characteristics from each study: the purpose of the study, data source, number of patients included, main method, evaluation metrics, and related outcomes. The final collection of articles was divided into categories to assist reading according to their application type in the ICUs.</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Selection Process and Results Overview</title>
        <p>The selection process of this review was demonstrated using the Preferred Reporting Items for Systematic Reviews and Meta-Analyses flow diagram (<xref rid="figure2" ref-type="fig">Figure 2</xref>). From the full text of 269 distinct articles, an independent assessment for eligibility was performed by 2 authors (SL and MF). Disagreements were discussed to reach consensus. During the full-text review, 249 articles were excluded, and 21 articles were eventually included. The reasons for exclusion during the review process are outlined in <xref ref-type="table" rid="table1">Table 1</xref>.</p>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>Preferred Reporting Items for Systematic Reviews and Meta-Analyses flow diagram of the search strategy.</p>
          </caption>
          <graphic xlink:href="jmir_v22i7e18477_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Exclusion criteria used to exclude papers.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="140"/>
            <col width="210"/>
            <col width="450"/>
            <col width="200"/>
            <thead>
              <tr valign="top">
                <td>Criterion number</td>
                <td>Exclusion criteria</td>
                <td>Justification</td>
                <td>Excluded articles, n</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>1</td>
                <td>Duplicates</td>
                <td>The papers have duplicate titles</td>
                <td>39</td>
              </tr>
              <tr valign="top">
                <td>2</td>
                <td>Not a research article</td>
                <td>The papers were blog articles, reports, comments, or views</td>
                <td>23</td>
              </tr>
              <tr valign="top">
                <td>3</td>
                <td>Not written in English</td>
                <td>The papers were not written in English</td>
                <td>6</td>
              </tr>
              <tr valign="top">
                <td>4</td>
                <td>Review</td>
                <td>The papers were review articles regarding general methods on big data, deep learning, and clinical applications</td>
                <td>12</td>
              </tr>
              <tr valign="top">
                <td>5</td>
                <td>Not applied in the field of critical care</td>
                <td>The papers did not focus on applications in critical care or intensive care</td>
                <td>92</td>
              </tr>
              <tr valign="top">
                <td>6</td>
                <td>Not using RL<sup>a</sup> as the approach in critical care</td>
                <td>The papers discussed issues in the critical care setting, but not using RL as an approach</td>
                <td>115</td>
              </tr>
              <tr valign="top">
                <td>7</td>
                <td>No clear description of the method and result</td>
                <td>The methods and results were not clearly described and thus not qualified for this review</td>
                <td>1</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table1fn1">
              <p><sup>a</sup>RL: reinforcement learning.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <p>In this section, we organized the reviewed articles into 4 categories, which reflect clinically relevant domains: (1) optimal individualized target laboratory value; (2) optimal choice of medication; (3) optimal timing of an intervention; and (4) optimal dosing of medication.</p>
        <p>We plotted the number of articles reviewed by their category and year of publication in <xref rid="figure3" ref-type="fig">Figure 3</xref>. We found that the majority of the papers were published in the past 3 years (n=17), indicating an increasing trend of applying RL-based approaches to assist physicians in decision making in critical care. In each of the 4 categories, we further organized the articles into subgroups based on their clinical questions (<xref rid="figure3" ref-type="fig">Figure 3</xref>). The figure shows that most of the applications used RL to find optimal drug dosing (n=16) [<xref ref-type="bibr" rid="ref6">6</xref>,<xref ref-type="bibr" rid="ref21">21</xref>,<xref ref-type="bibr" rid="ref29">29</xref>-<xref ref-type="bibr" rid="ref42">42</xref>], followed by the timing of an intervention (n=3) [<xref ref-type="bibr" rid="ref43">43</xref>-<xref ref-type="bibr" rid="ref45">45</xref>]. Only a few applications were looking at the individualized laboratory value (n=1) [<xref ref-type="bibr" rid="ref46">46</xref>] and the optimal choice of medication (n=1) [<xref ref-type="bibr" rid="ref47">47</xref>].</p>
        <fig id="figure3" position="float">
          <label>Figure 3</label>
          <caption>
            <p>Mapping of reinforcement learning studies in critical care by application type.</p>
          </caption>
          <graphic xlink:href="jmir_v22i7e18477_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <p>Next, we discuss the details for each category with the methods and outcomes for each application. In particular, we further grouped the studies based on specific medication or treatment type in categories 3 and 4 to assist readers. A summary of all study details is found in <xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>.</p>
      </sec>
      <sec>
        <title>Optimal Individualized Target Laboratory Value</title>
        <p>Even after decades of routine use of laboratory value ranges, reference standards may need to be reconsidered, especially for individual patients [<xref ref-type="bibr" rid="ref48">48</xref>]. Personalized targets for laboratory values in ICU patients could account for disease severity, comorbidities, and other patient-specific differences. Weng et al [<xref ref-type="bibr" rid="ref46">46</xref>] tried to identify individualized targeted blood glucose levels as a reference for physicians. They applied an RL-based approach, <italic>policy iteration</italic>, to learn the target glycemic range at an hourly interval for severely ill patients with sepsis using real ICU data. Their approach was tested using the Medical Information Mart for Intensive Care III (MIMIC III), a large, publicly available ICU database [<xref ref-type="bibr" rid="ref49">49</xref>]. MIMIC III contains information for hospital admissions of 43,000 patients in critical care units during 2001 and 2012, from which the authors extracted hourly data for 5565 patients with sepsis.</p>
        <p>Weng et al [<xref ref-type="bibr" rid="ref46">46</xref>] constructed their RL model as follows: First, they represented the patients’ states from 128 variables. These variables included patient demographics, comorbid conditions, vital sign changes, and laboratory value changes. They used a spare autoencoder [<xref ref-type="bibr" rid="ref50">50</xref>] to reduce the high dimensionality of the raw features (128 dimensions) to only 32 dimensions so that the RL model could be trained more efficiently with limited observational data. Second, they chose to act upon 1 of 11 discrete ranges of serum glucose at each time step. Third, they designed the reward function so that the RL agent could recommend an hourly target glucose level to optimize long-term survival. A positive 100 was assigned to the end state if patients survived 90 days after admission, and a negative 100 was assigned if the patients died. For each state-action pair, the <italic>value</italic> of the pair was iteratively estimated using the reward from the training data.</p>
        <p>To understand how the reward value was related to mortality, the authors assigned values to discrete buckets using separate test data. In each value bucket, if the state-action pair is part of a trajectory where a patient died, a label of 1 was assigned to that bucket; otherwise, a label of 0 was assigned. After assigning all the state-action pairs from the test data with the labels in the corresponding value bucket, the mortality rate could be estimated for each value bucket. The authors plotted the estimated mortality rate with respect to the value-buckets and found an inverse relationship between them, where the highest value was associated with the lowest mortality. This result suggested that the learnt value represented the relationship between the state-action pair and mortality and that the learnt value of the state-action pairs from training data was validated on the test data.</p>
        <p>To validate the RL policy, the author calculated the frequency of state transitions from the training data and generated new trajectories. Starting from the observed state in the test data, the RL policy would recommend an action with the highest value, and the subsequent state was estimated with the transition probability. By averaging the value for all state-action pairs in the simulated trajectory, the mortality for simulated trajectories could be estimated by mapping this value in the mortality-value plot. Compared with the actual mortality rate in the test data, the author claimed that if physicians could control patients’ hourly blood glucose levels within the range recommended by the RL model, the estimated 90-day mortality would be lowered by 6.3% (from 31% to 24.7%).</p>
      </sec>
      <sec>
        <title>Optimal Choice of Medications</title>
        <p>Apart from some clinical decision support systems, commonly used systems such as computerized prescriber order entry and bar-coded medication administration lack personalized recommendations to optimize medication effectiveness and minimize side effects [<xref ref-type="bibr" rid="ref51">51</xref>]. Wang et al [<xref ref-type="bibr" rid="ref47">47</xref>] applied a deep learning network based on RL to exploit medication recommendations with a data-driven strategy. Their approach accounted for individual patient demographics, laboratory values, vital signs, and diagnoses from the MIMIC III database. They selected the top 1000 out of 4127 medications and the top 2000 out of 6695 diseases (represented by the International Classification of Diseases, Ninth Revision codes), which covered 85.4% of all medication records and 95.3% of all diagnosis records, respectively. To reduce the problem complexity, the authors further categorized the 1000 medications into 180 drug categories using anatomical therapeutic chemical codes and aggregated patients’ drug prescriptions into 24-hour windows.</p>
        <p>The authors defined RL action as the medication combinations from the 180 drug categories. They adopted an actor-critic RL agent that suggested a daily medication prescription set, and aimed to improve patients’ hospital survival. The details of the actor-critic RL algorithm are explained in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> [<xref ref-type="bibr" rid="ref28">28</xref>]. For each patient’s ICU day, the <italic>actor</italic> network would recommend one medication combination by considering state variables such as demographics, laboratory results, and vital signs. A reward value of positive 15 would be given to the end state if a patient survived until hospital discharge and negative 15 if the patient died. The reward was designated as 0 for all other time steps. To counterbalance the <italic>actor</italic> network, the <italic>critic</italic> network was applied to evaluate the consistency of actual physician prescriptions and the RL agent’s recommendations. The net effect of the actor-critic RL agent was to optimize the long-term outcomes of patients (hospital mortality) while minimizing deviations of RL-recommended actions from actual prescription patterns. In addition to the actor-critic network, the authors also applied long short-term memory [<xref ref-type="bibr" rid="ref52">52</xref>] to represent a patient’s current state by incorporating the long sequence of all historical states. Wang et al [<xref ref-type="bibr" rid="ref47">47</xref>] suggested that hospital mortality would be reduced by 4.4% if clinicians adhered to the RL agent’s recommendations.</p>
      </sec>
      <sec>
        <title>Optimal Timing of Intervention</title>
        <sec>
          <title>Weaning of Mechanical Ventilation</title>
          <p>Mechanical ventilation (MV) is a life-saving treatment applied in approximately a third of all critically ill patients [<xref ref-type="bibr" rid="ref53">53</xref>]. Prematurely discontinuing MV (premature weaning) and excessively prolonged MV (late weaning) are both associated with higher mortality [<xref ref-type="bibr" rid="ref54">54</xref>]. The best time to wean may be uncertain [<xref ref-type="bibr" rid="ref55">55</xref>].</p>
          <p>To optimize the timing of ventilation discontinuation, Prasad et al [<xref ref-type="bibr" rid="ref43">43</xref>] applied the RL-based FQI (the details of the FQI algorithm are explained in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> [<xref ref-type="bibr" rid="ref28">28</xref>]) on the MIMIC III database for all patients who were kept under ventilator support for more than 24 hours and extracted their records every 10 min from ICU admission to discharge. Patient states included a number of factors that could affect extubation, such as demographics, pre-existing conditions, comorbidities, and time-varying vital signs. The action for the ventilation setting was binary, that is, for each 10-min time step, the RL agent needed to decide whether the ventilation should be set on (continued MV) or off (weaned from MV). For reward design, Prasad et al [<xref ref-type="bibr" rid="ref43">43</xref>] followed an existing weaning protocol from the Hospital of University of Pennsylvania. They assigned reward values to the RL agent at each time step according to 3 major considerations: (1) the RL agent should penalize each additional hour spent on the ventilator, (2) the RL agent should be assigned a positive reward value to a weaning action if the patient’s vital signs and laboratory results were steady and within normal ranges after extubation, and (3) there was no reward value for failed spontaneous breathing trial or for reintubation after the first extubation. For RL policy evaluation, the authors calculated the proportion of weaning actions from the RL policy, referencing the total number of weaning actions from the clinician’s policy at each time step, and calculated the overall consistency of weaning transitions. The recommend actions from the RL agent could match 85% of those from clinicians. The authors categorized the degree of consistency into 5 bins, and plotted the distribution of the number of reintubations with respect to the discrete consistency levels. Their results showed that when the consistency was high, vital sign fluctuations were fewer, laboratory results were more in-range, and reintubations were minimized.</p>
          <p>Yu et al [<xref ref-type="bibr" rid="ref45">45</xref>] studied the same clinical issue as Prasad et al [<xref ref-type="bibr" rid="ref43">43</xref>] and used the same data set, but designed a different reward function using inverse RL. The inverse RL model directly learnt reward mapping from data for each state-action pair and inferred what clinicians would wish to achieve as a reward. Similar to Prasad et al [<xref ref-type="bibr" rid="ref43">43</xref>], the RL recommendations by Yu et al [<xref ref-type="bibr" rid="ref45">45</xref>] were associated with shorter weaning times and fewer reintubations compared with clinician decision making.</p>
        </sec>
        <sec>
          <title>Timing to Order Laboratory Tests</title>
          <p>The timing of ordering a laboratory test can be challenging. Delayed testing would lead to continued uncertainty over the patient’s condition and possible late treatment [<xref ref-type="bibr" rid="ref56">56</xref>]. However, excessively early ordering of laboratory tests can cause unnecessary discomfort to the patient, increase the risk of anemia, and increase health care cost.</p>
          <p>Cheng et al [<xref ref-type="bibr" rid="ref44">44</xref>] applied the FQI method to find the optimal timing for ordering laboratory tests among patients with sepsis in the MIMIC III data set. They examined the timing of 4 types of laboratory tests: white blood cell count (WBC), creatinine, blood urea nitrogen (BUN), and lactate. They sampled the patients’ data at hourly intervals and constructed the state of a patient by considering the predictive variables of severe sepsis or acute kidney failure, including respiratory rate, heart rate, mean blood pressure, temperature, creatinine, BUN, WBC, and lactate. The missing values were predicted by a multioutput Gaussian process [<xref ref-type="bibr" rid="ref57">57</xref>,<xref ref-type="bibr" rid="ref58">58</xref>]. In their RL model, they chose to design the reward function with the combination of 4 factors: (1) a positive reward should be given only if the ordering of test was necessary, while penalizing over or under ordering; (2) the RL agent should be encouraged to order laboratory tests when there was a sudden change in laboratory results or vital signs; (3) negative reward should be given if the laboratory results were similar to the last measurements (no information gain); (4) a penalty would be added to a reward whenever a test was ordered, to reflect the testing cost. Their RL agent, compared with clinicians, was able to reduce the number of laboratory tests by 27% for lactate and 44% for WBC, while maintaining high information gain.</p>
        </sec>
      </sec>
      <sec>
        <title>Optimal Dosing of a Drug</title>
        <p>Recommendations for dosing regimens in ICU patients are often extrapolated from clinical trials in healthy volunteers or noncritically ill patients. This extrapolation assumes similar drug behavior (pharmacokinetics and pharmacodynamics) in the ICU and other patients or healthy volunteers. However, it is well known that many drugs used in critically ill patients may have alterations in pharmacokinetic and pharmacodynamic properties because of pathophysiological changes or drug interactions [<xref ref-type="bibr" rid="ref59">59</xref>]. Therefore, critically ill patients bring unique challenges in drug dosing.</p>
        <sec>
          <title>Dosing of Propofol</title>
          <p>Critically ill patients in ICUs often require sedation to facilitate various clinical procedures and to comfort patients during treatment. Propofol is a widely used sedative medication [<xref ref-type="bibr" rid="ref60">60</xref>], but titration of propofol is challenging, and both over sedation and under sedation can have adverse effects [<xref ref-type="bibr" rid="ref32">32</xref>]. Of the studies reviewed, 6 studies have focused on applying RL to determine the optimal dosage for propofol while maintaining the physiological stability of the patient. The bispectral index (BIS) was used to monitor sedation level and to determine the effect of propofol.</p>
          <p>Borera et al [<xref ref-type="bibr" rid="ref29">29</xref>] was the first to apply RL to a pharmacokinetic model [<xref ref-type="bibr" rid="ref61">61</xref>] to describe the time-dependent distribution of propofol in human surgical patients. The RL agent was a neural network aimed at optimizing the propofol dose to achieve the target BIS value. The patient’s state and state transition were modeled using a mathematical pharmacokinetic model with predefined parameters such as the concentration at half maximal effect of BIS, degree of nonlinearity of BIS, and time-lag coefficient to estimate the BIS value for simulated patients. The action was a discrete range of propofol infusion rate. The reward function was the error rate between the target BIS value and the current simulated BIS value, where a larger negative reward was given when the current simulated BIS value was further away from the predefined target value. They measured the performance of the RL agent by looking at the time to reach the target BIS value (steady time). The evaluation was conducted on 1000 simulated patients. On average, the steady time was 3.25 min for the BIS value to reach target.</p>
          <p>To ensure patient safety, propofol dosing should consider the concurrent stability of vital parameters. For instance, Padmanabhan et al [<xref ref-type="bibr" rid="ref30">30</xref>] chose mean arterial pressure (MAP) as the secondary control variable. The authors combined the error rates for both BIS and MAP when designing the reward. The target for the RL agent was to infuse propofol so that the target BIS would be reached in a short time, whereas MAP was kept within a desired range. In subsequent studies, Padmanabhan et al [<xref ref-type="bibr" rid="ref31">31</xref>,<xref ref-type="bibr" rid="ref32">32</xref>] modified their methods with different RL training algorithms (Q-learning and policy iteration). In all their studies, the RL agent was able to suggest accurate propofol doses and achieve target BIS values within a few minutes.</p>
          <p>In contrast to fixed pharmacokinetic models in the RL model environment, Yu et al [<xref ref-type="bibr" rid="ref45">45</xref>] applied FQI and Bayesian inverse RL on the MIMIC III database. They considered patients’ demographic characteristics, pre-existing conditions, comorbidities, and time-varying vital signs to construct the state of the patient. Their inverse RL model interpreted clinician preference as a reward for different patient states. The learned reward function from the inverse RL model suggested that clinicians may pay more attention to patients’ cardiorespiratory stability rather than oxygenation when making decisions about propofol dosage.</p>
        </sec>
        <sec>
          <title>Dosing of Intravenous Heparin</title>
          <p>Anticoagulant agents are often used to prevent and treat a wide range of cardiovascular diseases. Heparin is commonly used in critical care [<xref ref-type="bibr" rid="ref62">62</xref>], yet its precise dosing is complicated by a narrow therapeutic window. Overdosing of heparin results in bleeding whereas under dosing risks clotting. To guide heparin dosing, activated partial thromboplastin time (aPTT) is often used as a measure of the anticoagulant effect of heparin.</p>
          <p>Nemati et al [<xref ref-type="bibr" rid="ref6">6</xref>] applied FQI with a neural network to optimize and individualize heparin dosing. Their study was conducted on the MIMIC II database, with the reward function based on aPTT levels following heparin dosing [<xref ref-type="bibr" rid="ref63">63</xref>]. The reward to the RL agent will be high if the aPTT value is between 60 and 100 seconds. After training, they plot the state-action value with respect to the level of consistency between the RL policy and clinician practice. Their results showed that, on average, following the recommendations of the RL agent resulted in higher state-action values.</p>
          <p>Ghassemi et al [<xref ref-type="bibr" rid="ref33">33</xref>] and Lin et al [<xref ref-type="bibr" rid="ref34">34</xref>] focused on a personalized optimal heparin dosing using different RL algorithms. In addition to the MIMIC III data set, Lin et al [<xref ref-type="bibr" rid="ref34">34</xref>] applied an actor-critic network on the Emory Healthcare data set from Emory University. For RL policy evaluation, Lin et al [<xref ref-type="bibr" rid="ref34">34</xref>] regressed the discordance between RL policy and physician practice over the number of clotting and bleeding complications, adjusting for covariates such as history of clot or bleed, weight, age, and sequential organ failure assessment score. The regression coefficient suggested that following the RL agent’s recommendations would have likely resulted in improved clinical outcomes with a reduced number of clotting and bleeding complications.</p>
        </sec>
        <sec>
          <title>Intravenous Fluids, Vasopressors, and Cytokine Therapy for Treating Sepsis</title>
          <p>Sepsis is the third leading cause of death and is expensive to treat [<xref ref-type="bibr" rid="ref64">64</xref>]. Besides antibiotics and source control, challenges remain with the use of intravenous (IV) fluids to correct hypovolemia and administration of vasopressors to counteract sepsis-induced vasodilation. Raghu et al [<xref ref-type="bibr" rid="ref36">36</xref>] suggested a data-driven RL approach to recommend personalized optimal dosage for IV fluids and vasopressors to improve hospital mortality. Their RL model was double DQN with dueling, which can minimize the overestimation problem of previous Q-learning models. The details of the Q-learning and double DQN algorithms are explained in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> [<xref ref-type="bibr" rid="ref28">28</xref>]. The authors considered patients’ demographics, laboratory values, vital signs, and intake/output events as state features in the RL model. Action was designed as a combination of 5 discrete bins for IV fluid dosing and 5 bins for vasopressor dosing to treat patients with sepsis. The reward was issued at the terminal time step of the patient’s trajectory, with a positive reward if the patient survived. Data were extracted from the MIMIC III database for all patients who fulfilled sepsis-3 criteria [<xref ref-type="bibr" rid="ref65">65</xref>]. For policy evaluation, Raghu et al [<xref ref-type="bibr" rid="ref36">36</xref>] plotted the estimated hospital mortality with respect to the difference between dosages recommended by the RL agent and by clinicians. The plot showed that the mortality was lowest when there was no discrepancy between RL policy and physician decision making. Six other groups of researchers also focused on the same research question and applied various RL algorithms with slightly different designs of the state space, reward function, and evaluation metrics [<xref ref-type="bibr" rid="ref21">21</xref>,<xref ref-type="bibr" rid="ref35">35</xref>,<xref ref-type="bibr" rid="ref37">37</xref>-<xref ref-type="bibr" rid="ref40">40</xref>]. The findings from these studies all suggest that the RL agent would be able to learn from the data and if physicians followed the RL policy, the estimated hospital mortality could be improved.</p>
          <p>Among the aforementioned studies, Komorowski et al [<xref ref-type="bibr" rid="ref21">21</xref>] were the pioneers of applying RL in the ICU, using data from patients with sepsis in the MIMIC III database. They inferred a patient’s health status using an array of inputs, which included demographics, vital signs, laboratory tests, illness severity scores, medications, procedures, fluid intake and output, physician notes, and diagnostic coding. Patient data were aggregated and averaged every 4 hours to represent patient states. Using a k-means algorithm, these patient states were then simplified into 750 discrete mutually exclusive clusters. A sequence of these clustered states would describe a particular patient’s trajectory. The authors estimated the state transition probability by counting how many times each transition was observed and converted the counts to a stochastic matrix. This transition matrix contained the probability for each patient going to a new state, given a previous action taken in the current state. The entire trajectory of a patient’s state can be estimated using the transition matrix. The authors applied a policy iteration RL algorithm that learnt the optimal dosing policy for IV fluids and vasopressors to maximize the probability of 90-day survival.</p>
          <p>Nevertheless, the study by Komorowski et al [<xref ref-type="bibr" rid="ref21">21</xref>]. had several limitations. First, their study only considered fluid and vasopressor management, ignoring other important treatments such as source control, correction of hypovolemia, and management of secondary organ failures [<xref ref-type="bibr" rid="ref21">21</xref>]. Second, 90-day mortality is affected by factors outside of the ICU, which the study did not take into account. Third, clinical decision making considers both short-term outcomes (eg, physiological stability) and long-term outcomes (eg, kidney failure or mortality), but the study only considered mortality as the single goal for training the RL algorithm [<xref ref-type="bibr" rid="ref66">66</xref>]. Fourth, discretizing patient health status into discrete clusters loses data granularity and may limit the ability to detect changes in patient status. These limitations also occur in other studies, which we will elaborate in the Discussion section.</p>
          <p>Other than using IV fluids and vasopressors for treating sepsis. Petersen et al [<xref ref-type="bibr" rid="ref42">42</xref>] investigated cytokine therapy using the deep deterministic policy gradient [<xref ref-type="bibr" rid="ref67">67</xref>] method. The details of the policy gradient RL algorithm are explained in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> [<xref ref-type="bibr" rid="ref28">28</xref>]. They evaluated the RL model by using an agent-based model, the innate immune response agent-based model [<xref ref-type="bibr" rid="ref68">68</xref>], that simulated the immune response to infection. The RL policy was able to achieve a very low mortality rate of 0.8% over 500 simulated patients, and suggested that personalized multicytokine treatment could be promising for patients with sepsis.</p>
        </sec>
        <sec>
          <title>Dosing of Morphine</title>
          <p>Critically ill patients may experience pain as a result of disease or certain invasive interventions. Morphine is one of the most commonly used opioids for analgesia [<xref ref-type="bibr" rid="ref69">69</xref>]. Similar to sedation, the dosing of analgesia is subject to uncertainty. Lopez-Martinez et al [<xref ref-type="bibr" rid="ref41">41</xref>] collected data for patients who had at least one pain intensity score and at least one dose of IV morphine in the MIMIC III database. They applied double DQN with dueling as their RL model and constructed the state space to be continuous with features including the patient’s self-reported pain intensity and their measured physiological status. The action was a choice of 14 discrete dosing ranges of IV morphine. The reward was determined by considering both the patients’ cardiorespiratory stability and their pain intensity. The highest reward was given when pain was absent and both heart rate and respiration rate were within the acceptable range. By comparing the RL policy with physicians’ choices, Lopez-Martinez et al [<xref ref-type="bibr" rid="ref41">41</xref>] found that RL policy tended to prescribe higher doses of morphine. This result was consistent with previous studies: continuous dosing provided similar or even better pain relief with no increase in acute adverse effects [<xref ref-type="bibr" rid="ref70">70</xref>,<xref ref-type="bibr" rid="ref71">71</xref>].</p>
        </sec>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <p>Our comprehensive review of the literature demonstrates that RL has the potential to be a clinical decision support tool in the ICU. As the RL algorithm is well aligned with sequential decision making in ICUs, RL consistently outperformed physicians in simulated studies. Nonetheless, challenges regarding RL system design, evaluation metrics, and model choice exist. In addition, all current applications have focused on using retrospective data sets to derive treatment algorithms and require prospective validation in authentic clinical settings.</p>
      </sec>
      <sec>
        <title>RL System Design</title>
        <p>The majority of applications were similar in their formulation of the RL system design. The state space is usually constructed by features including patient demographics, laboratory test values, and vital signs, whereas some studies applied encoding methods to represent the state of the patients instead of using raw features. The action space was very specific to each application. For instance, in terms of the dosing category, the action space would be discretized ranges of medication dosage. For other categories, such as timing of an intervention, the action space would be the binary indicator of an intervention for each time step. The number of action levels differed among the studies. For some studies, the action levels could be as many as a dozen or a hundred (eg, optimal medication combination), whereas for other studies, the action levels were limited to only 2 (eg, on/off MV). The design of the reward function is central to successful RL learning. Most of the reward functions were designed a priori with guidance from clinical practice and protocols, but 2 studies [<xref ref-type="bibr" rid="ref40">40</xref>,<xref ref-type="bibr" rid="ref45">45</xref>] managed to directly learn the reward function from the data using inverse RL.</p>
      </sec>
      <sec>
        <title>Evaluation Metrics</title>
        <p>The only metric that matters is if the adoption of an RL algorithm leads to improvement in some clinical outcomes. Most studies calculated the estimated mortality as the long-term outcome and drew plots to show the relationship between the estimated mortality versus the learnt value of patients’ state-action trajectories, where the higher value function was associated with lower mortality. The RL agent would provide treatment suggestions for those actions with higher values, thus leading to a lower estimated mortality. Estimated mortality is a popular metric for RL policy evaluation. However, the problem with the estimated mortality is that it is calculated from simulated trajectories with observational data, and may not be the actual mortality.</p>
        <p>Mortality is not always the most relevant and appropriate outcome measure. For instance, in the study by Weng et al [<xref ref-type="bibr" rid="ref46">46</xref>], they tried to identify individualized targeted blood glucose levels as a reference for physicians. In their study, 90-day mortality was used to evaluate the RL policy. However, a more relevant measure could be considered, such as short-term changes in the blood glucose level, physiological stability, and development of complications.</p>
        <p>Several studies that focused on propofol titration have considered BIS as the evaluation metric to monitor the sedation level and hence to determine the effect of propofol. Although BIS monitoring is fairly objective, assessing sedation is usually performed by health care providers with clinically validated behavioral assessment scales such as the Richmond Agitation-Sedation Scale score [<xref ref-type="bibr" rid="ref72">72</xref>]. In addition, EEG-based technologies, such as BIS and M-entropy, have been validated more in the operating room than in the ICU [<xref ref-type="bibr" rid="ref73">73</xref>]. Furthermore, BIS cannot be used as the sole monitoring parameter for sedation, as it is affected by several other factors, including the anesthetic drugs used, muscle movement, or artifacts from surgical equipments [<xref ref-type="bibr" rid="ref74">74</xref>].</p>
        <p>To date, there has been no prospective evaluation of an RL algorithm. Moreover, the observational data itself may not truly reflect the underlying condition of patients. This is known as the partially observable MDP [<xref ref-type="bibr" rid="ref75">75</xref>] problem, where we are only able to represent a patient's state by the observed physiological features, which are solved by mathematical approximation.</p>
      </sec>
      <sec>
        <title>Model Choice</title>
        <p>FQI and DQN seem to be the top RL approaches among the reviewed studies. FQI is not a deep learning–based RL model, which guarantees convergence for many commonly used regressors, including kernel-based methods and decision trees. On the other hand, DQN leverages the representational power of DNNs to learn optimal treatment recommendations, mapping the patient state-action pair to the value function. Neural networks hold an advantage over tree-based methods in iterative settings in that it is possible to simply update the network weights at each iteration, rather than rebuilding the trees entirely.</p>
        <p>Both FQI and DQN are off-policy RL models. Off-policy refers to learning about one way of behaving from the data generated by another way of selecting actions [<xref ref-type="bibr" rid="ref76">76</xref>]. For instance, an off-policy RL model tries to train a policy X to select actions in each step, but it estimates the Q-values from state-action pairs where the action was chosen by following another policy Y. In contrast to off-policy learning, on-policy learning uses the same policy X to choose actions and to evaluate the returns in each step during training. Most of the included studies adopted off-policy RL models because the RL models aim to learn policy X from the data, which was generated by following real actions of physicians (policy Y). The data generated by policy Y is the actual physicians’ policy, where the RL models try to learn and improve from. This is the fundamental idea of applying off-policy RL models.</p>
        <p>In addition, both FQI and DQN are value-based RL models that aim to learn the value functions. In value-based RL, a policy can be derived by following the action with the highest value at each time step. Another type of RL is called policy-based RL, which aims to learn the policy directly without worrying about the value function. Policy-based methods are more useful in continuous space. When the data volume is insufficient to train a DQN model, the DQN is not guaranteed to achieve a stable RL policy. As there is an infinite number of actions or states to estimate the values for, value-based RL models are too computationally expensive in the continuous space. However, policy-based RL models demand more data samples for training. Otherwise, the learned policy is not guaranteed to converge to an optimal one. Both value-based and policy-based RL models can be grouped in a more general way as <italic>model-free</italic> RL. Here the word <italic>model-free</italic> means the environment is unknown to an agent. The RL agent makes use of the trajectories generated from the environment, rather than explicitly knowing the rule or the transition probability. In contrast to model-free RL, <italic>model-based</italic> RL requires the agent to know the transition probability for all the state-action combinations explicitly and hence impractical as the state space and action space grow. In the critical care context, patients’ conditions and prognosis are very complex to apply model-based RL because we are not exactly sure about the probability of all state transitions. In addition, most studies in critical care could only use limited retrospective data to train the model offline. Therefore, we found that most of the studies have applied a value-based RL model to utilize the available observational data.</p>
      </sec>
      <sec>
        <title>Common Data Sets</title>
        <p>We found that 71% (15/21) of applications utilized the MIMIC II or MIMIC III database to conduct their experiments. We conjecture that such popularity might be due to public availability and high quality of MIMIC data. However, data collected from a single source may introduce potential bias to the research findings. There are inherent biases in the medical data sets obtained at various institutions due to multiple factors, including operation strategy, hospital protocol, instrument difference, and patient preference. Therefore, the RL models trained on a single data set, regardless of the data volume, cannot be confidently applied to another data set. The findings from the reviewed articles may not be generalizable to other institutions and populations. In addition to the MIMIC database, one of the studies also utilized the eICU Research Institute (eRI) database to test their RL model [<xref ref-type="bibr" rid="ref77">77</xref>]. The eRI database has a larger volume of data compared with the MIMIC database, and it is also publicly available. We suggest that future applications could cross-validate their models on both the MIMIC and eRI databases. In addition, all current applications have focused on using retrospective data sets to derive treatment algorithms and require prospective validation in authentic clinical settings.</p>
      </sec>
      <sec>
        <title>Strengths and Limitations of This Study</title>
        <p>The strengths of this paper include the comprehensive and extensive search for all available publications that applied RL as an approach in the critical care context. Nonetheless, we acknowledge the limitations. We included papers (eg, those on arXiv) that have not been peer-reviewed <italic>before</italic> publication but these papers have undergone a postpublication peer review. According to the search phrases applied in this review, we may miss out certain papers that applied RL in critical care, but did not specify the phrase <italic>intensive care</italic> nor <italic>ICU</italic> in their full text papers.</p>
      </sec>
      <sec>
        <title>Challenges and Future Directions</title>
        <p>A number of challenges must be overcome before RL can be implemented in a clinical setting. First, it is important to have a meaningful reward design. The RL agent would be vulnerable in case of reward misspecification, and might not be able to produce any meaningful treatment suggestion. Inverse RL can be an alternative to a priori–specified reward functions. However, inverse RL assumes that the given data represent the experts’ demonstrations and the recommendations from the data were already optimal; these may not be true.</p>
        <p>Second, medical domains present special challenges with respect to data acquisition, analysis, interpretation, and presentation of these data in a clinically relevant and usable format. Addressing the question of censoring in suboptimal historical data and explicitly correcting for the bias that arises from the timing of interventions or dosing of medication is crucial to fair evaluation of learnt policies.</p>
        <p>Third, another challenge for applying the RL model in the clinical setting is exploration. Unlike other domains such as game playing, where one can repeat the experiments as many times, in the clinical setting, the RL agent has to learn from a limited set of data and intervention variations that were collected offline. Using trial and error to explore all possible scenarios may conflict with medical ethics, thereby limiting the ability of the RL agent to attempt new behaviors to discover ones with higher rewards and better long-term outcomes.</p>
        <p>In comparison with other machine learning approaches, there is an absence of acceptable performance standards in RL. This problem is not unique to RL but seems harder to address in RL compared with other machine learning approaches, such as prediction and classification algorithms, where accuracy and precision recall are more straightforward to implement. However, it is worth noting that RL has a distinct advantage over other machine learning approaches, that one can choose which outcome to optimize by specifying the reward function. This provides an opportunity to involve patient preferences and shared decision making. This becomes more relevant when learned policies change depending on the reward function. For example, an RL algorithm that optimizes survival may recommend a different set of treatments versus an RL algorithm that optimizes neurologic outcome. In such situations, patient preference is elicited to guide the choice of the RL algorithm.</p>
        <p>RL has the potential to offer considerable advantages in supporting the decision making of physicians. However, certain key issues need to be addressed, such as clinical implementation, ethics, and medico-legal limitations in health care delivery [<xref ref-type="bibr" rid="ref78">78</xref>]. In fact, any machine learning model would need to address these limitations carefully to serve as truly effective tools. In clinical practice, the RL models need to be refined iteratively throughout the time to include newly generated data from electronic health systems in hospitals, and the model must produce robust results for physicians to interpret and understand. Besides, patients’ understanding and willingness to use the RL model as a supporting tool in their care would be another important consideration. Another important ethical consideration would be the liability in case of medical error when the RL model recommendation differs from the physician. It has an impact on the autonomy of both the physician and patient. The problem of medical error works in both ways when there is a poor outcome: (1) if the physician follows the RL model recommendation, can the clinician then blame the model and the personnel who maintain the model; (2) if the clinician does not follow the RL model recommendation, can the clinician then be said to have made the wrong decision and be penalized.</p>
        <p>Possible directions for future work include (1) modeling the RL environment as a partially observable MDP, in which observations from the data are mapped to some state space that truly represents patients’ underlying well-being; (2) extending the action space to be continuous, suggesting more precise and practical treatment recommendations to physicians; and (3) improving the interpretability of the RL models so that physicians can have more confidence in accepting the model results. With further efforts to tackle these challenges, RL methods could play a crucial role in helping to inform patient-specific decisions in critical care.</p>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>In this comprehensive review, we synthesized data from 21 articles on the use of RL to process or analyze retrospective data from ICU patients. With the improvement of data collection and advancement in reinforcement learning technologies, we see great potential in RL-based decision support systems to optimize treatment recommendations for critical care.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>Introduction to reinforcement learning.</p>
        <media xlink:href="jmir_v22i7e18477_app1.pdf" xlink:title="PDF File  (Adobe PDF File), 617 KB"/>
      </supplementary-material>
      <supplementary-material id="app2">
        <label>Multimedia Appendix 2</label>
        <p>Summary of study characteristics.</p>
        <media xlink:href="jmir_v22i7e18477_app2.pdf" xlink:title="PDF File  (Adobe PDF File), 167 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">AI</term>
          <def>
            <p>artificial intelligence</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">aPTT</term>
          <def>
            <p>activated partial thromboplastin time</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">BIS</term>
          <def>
            <p>bispectral index</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">BUN</term>
          <def>
            <p>blood urea nitrogen</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">DNN</term>
          <def>
            <p>deep neural network</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">DQN</term>
          <def>
            <p>deep Q network</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">eRI</term>
          <def>
            <p>eICU Research Institute</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">FQI</term>
          <def>
            <p>fitted-Q-Iteration</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb9">ICU</term>
          <def>
            <p>intensive care unit</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb10">IV</term>
          <def>
            <p>intravenous</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb11">MAP</term>
          <def>
            <p>mean arterial pressure</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb12">MDP</term>
          <def>
            <p>Markov decision process</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb13">MIMIC III</term>
          <def>
            <p>Medical Information Mart for Intensive Care III</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb14">MV</term>
          <def>
            <p>mechanical ventilation</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb15">RCT</term>
          <def>
            <p>randomized controlled trial</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb16">RL</term>
          <def>
            <p>reinforcement learning</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb17">WBC</term>
          <def>
            <p>white blood cell</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>SL was funded by the National University of Singapore Graduate School for Integrative Sciences and Engineering Scholarship. This research was supported by the National Research Foundation Singapore under its AI Singapore Programme (award no. AISG-GC-2019-002), the National University Health System joint grant (WBS R-608-000-199-733), and the National Medical Research Council health service research grant (HSRG-OC17nov004).</p>
    </ack>
    <fn-group>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Almirall</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Compton</surname>
              <given-names>SN</given-names>
            </name>
            <name name-style="western">
              <surname>Gunlicks-Stoessel</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Duan</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Murphy</surname>
              <given-names>SA</given-names>
            </name>
          </person-group>
          <article-title>Designing a pilot sequential multiple assignment randomized trial for developing an adaptive treatment strategy</article-title>
          <source>Stat Med</source>
          <year>2012</year>
          <month>07</month>
          <day>30</day>
          <volume>31</volume>
          <issue>17</issue>
          <fpage>1887</fpage>
          <lpage>902</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/22438190"/>
          </comment>
          <pub-id pub-id-type="doi">10.1002/sim.4512</pub-id>
          <pub-id pub-id-type="medline">22438190</pub-id>
          <pub-id pub-id-type="pmcid">PMC3399974</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Marple</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Salazar</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Gupta</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Tamil</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>A physician advisory system for chronic heart failure management based on knowledge patterns</article-title>
          <source>Theor Pract Log Prog</source>
          <year>2016</year>
          <month>10</month>
          <day>14</day>
          <volume>16</volume>
          <issue>5-6</issue>
          <fpage>604</fpage>
          <lpage>18</lpage>
          <pub-id pub-id-type="doi">10.1017/S1471068416000429</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hannes</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Leys</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Vermeire</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Aertgeerts</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Buntinx</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Depoorter</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Implementing evidence-based medicine in general practice: a focus group based study</article-title>
          <source>BMC Fam Pract</source>
          <year>2005</year>
          <month>09</month>
          <day>9</day>
          <volume>6</volume>
          <fpage>37</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcfampract.biomedcentral.com/articles/10.1186/1471-2296-6-37"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/1471-2296-6-37</pub-id>
          <pub-id pub-id-type="medline">16153300</pub-id>
          <pub-id pub-id-type="pii">1471-2296-6-37</pub-id>
          <pub-id pub-id-type="pmcid">PMC1253510</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hutchinson</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Baker</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Making use of guidelines in clinical practice</article-title>
          <source>Br Med J</source>
          <year>1999</year>
          <month>10</month>
          <day>16</day>
          <volume>319</volume>
          <issue>7216</issue>
          <fpage>1078</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/10521225"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/bmj.319.7216.1078</pub-id>
          <pub-id pub-id-type="medline">10521225</pub-id>
          <pub-id pub-id-type="pmcid">PMC1116869</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>James</surname>
              <given-names>JT</given-names>
            </name>
          </person-group>
          <article-title>A new, evidence-based estimate of patient harms associated with hospital care</article-title>
          <source>J Patient Saf</source>
          <year>2013</year>
          <month>09</month>
          <volume>9</volume>
          <issue>3</issue>
          <fpage>122</fpage>
          <lpage>8</lpage>
          <pub-id pub-id-type="doi">10.1097/PTS.0b013e3182948a69</pub-id>
          <pub-id pub-id-type="medline">23860193</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Nemati</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Ghassemi</surname>
              <given-names>MM</given-names>
            </name>
            <name name-style="western">
              <surname>Clifford</surname>
              <given-names>GD</given-names>
            </name>
          </person-group>
          <article-title>Optimal medication dosing from suboptimal clinical examples: a deep reinforcement learning approach</article-title>
          <source>Conf Proc IEEE Eng Med Biol Soc</source>
          <year>2016</year>
          <month>08</month>
          <volume>2016</volume>
          <fpage>2978</fpage>
          <lpage>81</lpage>
          <pub-id pub-id-type="doi">10.1109/EMBC.2016.7591355</pub-id>
          <pub-id pub-id-type="medline">28268938</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Hong</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <article-title>Scientific evidence underlying the recommendations of critical care clinical practice guidelines: a lack of high level evidence</article-title>
          <source>Intensive Care Med</source>
          <year>2018</year>
          <month>07</month>
          <volume>44</volume>
          <issue>7</issue>
          <fpage>1189</fpage>
          <lpage>91</lpage>
          <pub-id pub-id-type="doi">10.1007/s00134-018-5142-8</pub-id>
          <pub-id pub-id-type="medline">29564478</pub-id>
          <pub-id pub-id-type="pii">10.1007/s00134-018-5142-8</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Laffey</surname>
              <given-names>JG</given-names>
            </name>
            <name name-style="western">
              <surname>Kavanagh</surname>
              <given-names>BP</given-names>
            </name>
          </person-group>
          <article-title>Negative trials in critical care: why most research is probably wrong</article-title>
          <source>Lancet Respir Med</source>
          <year>2018</year>
          <month>09</month>
          <volume>6</volume>
          <issue>9</issue>
          <fpage>659</fpage>
          <lpage>60</lpage>
          <pub-id pub-id-type="doi">10.1016/S2213-2600(18)30279-0</pub-id>
          <pub-id pub-id-type="medline">30061048</pub-id>
          <pub-id pub-id-type="pii">S2213-2600(18)30279-0</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Burke</surname>
              <given-names>AE</given-names>
            </name>
            <name name-style="western">
              <surname>Thaler</surname>
              <given-names>KM</given-names>
            </name>
            <name name-style="western">
              <surname>Geva</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Adiri</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Feasibility and acceptability of home use of a smartphone-based urine testing application among women in prenatal care</article-title>
          <source>Am J Obstet Gynecol</source>
          <year>2019</year>
          <month>11</month>
          <volume>221</volume>
          <issue>5</issue>
          <fpage>527</fpage>
          <lpage>8</lpage>
          <pub-id pub-id-type="doi">10.1016/j.ajog.2019.06.015</pub-id>
          <pub-id pub-id-type="medline">31300161</pub-id>
          <pub-id pub-id-type="pii">S0002-9378(19)30779-3</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Laserson</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Lantsman</surname>
              <given-names>CD</given-names>
            </name>
            <name name-style="western">
              <surname>Cohen-Sfady</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Tamir</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Goz</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Brestel</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Bar</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Atar</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Elnekave</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>TextRay: Mining Clinical Reports to Gain a Broad Understanding of Chest X-Rays</article-title>
          <source>International Conference on Medical Image Computing and Computer-Assisted Intervention</source>
          <year>2018</year>
          <conf-name>MICCAI'18</conf-name>
          <conf-date>September 16-20, 2018</conf-date>
          <conf-loc>Granada, Spain</conf-loc>
          <pub-id pub-id-type="doi">10.1007/978-3-030-00934-2_62</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Choi</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Bahadori</surname>
              <given-names>MT</given-names>
            </name>
            <name name-style="western">
              <surname>Schuetz</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Stewart</surname>
              <given-names>WF</given-names>
            </name>
            <name name-style="western">
              <surname>Sun</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Doctor AI: predicting clinical events via recurrent neural networks</article-title>
          <source>JMLR Workshop Conf Proc</source>
          <year>2016</year>
          <month>08</month>
          <volume>56</volume>
          <fpage>301</fpage>
          <lpage>18</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/28286600"/>
          </comment>
          <pub-id pub-id-type="medline">28286600</pub-id>
          <pub-id pub-id-type="pmcid">PMC5341604</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Fan</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Hu</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Hu</surname>
              <given-names>W</given-names>
            </name>
          </person-group>
          <article-title>Automatic treatment planning based on three-dimensional dose distribution predicted from deep learning technique</article-title>
          <source>Med Phys</source>
          <year>2019</year>
          <month>01</month>
          <volume>46</volume>
          <issue>1</issue>
          <fpage>370</fpage>
          <lpage>81</lpage>
          <pub-id pub-id-type="doi">10.1002/mp.13271</pub-id>
          <pub-id pub-id-type="medline">30383300</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Dagan</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Elnekave</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Barda</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Bregman-Amitai</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Bar</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Orlovsky</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Bachmat</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Balicer</surname>
              <given-names>RD</given-names>
            </name>
          </person-group>
          <article-title>Automated opportunistic osteoporotic fracture risk assessment using computed tomography scans to aid in FRAX underutilization</article-title>
          <source>Nat Med</source>
          <year>2020</year>
          <month>01</month>
          <volume>26</volume>
          <issue>1</issue>
          <fpage>77</fpage>
          <lpage>82</lpage>
          <pub-id pub-id-type="doi">10.1038/s41591-019-0720-z</pub-id>
          <pub-id pub-id-type="medline">31932801</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41591-019-0720-z</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Watanabe</surname>
              <given-names>AT</given-names>
            </name>
            <name name-style="western">
              <surname>Lim</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Vu</surname>
              <given-names>HX</given-names>
            </name>
            <name name-style="western">
              <surname>Chim</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Weise</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Bradley</surname>
              <given-names>WG</given-names>
            </name>
            <name name-style="western">
              <surname>Comstock</surname>
              <given-names>CE</given-names>
            </name>
          </person-group>
          <article-title>Improved cancer detection using artificial intelligence: a retrospective evaluation of missed cancers on mammography</article-title>
          <source>J Digit Imaging</source>
          <year>2019</year>
          <month>08</month>
          <volume>32</volume>
          <issue>4</issue>
          <fpage>625</fpage>
          <lpage>37</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/31011956"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/s10278-019-00192-5</pub-id>
          <pub-id pub-id-type="medline">31011956</pub-id>
          <pub-id pub-id-type="pii">10.1007/s10278-019-00192-5</pub-id>
          <pub-id pub-id-type="pmcid">PMC6646649</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Montague</surname>
              <given-names>PR</given-names>
            </name>
          </person-group>
          <article-title>Reinforcement learning: an introduction, by Sutton, RS and Barto, AG</article-title>
          <source>Trends Cogn Sci</source>
          <year>1999</year>
          <month>09</month>
          <volume>3</volume>
          <issue>9</issue>
          <fpage>360</fpage>
          <pub-id pub-id-type="doi">10.1016/S1364-6613(99)01331-5</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kiumarsi</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Vamvoudakis</surname>
              <given-names>KG</given-names>
            </name>
            <name name-style="western">
              <surname>Modares</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Lewis</surname>
              <given-names>FL</given-names>
            </name>
          </person-group>
          <article-title>Optimal and autonomous control using reinforcement learning: a survey</article-title>
          <source>IEEE Trans Neural Netw Learn Syst</source>
          <year>2018</year>
          <month>06</month>
          <volume>29</volume>
          <issue>6</issue>
          <fpage>2042</fpage>
          <lpage>62</lpage>
          <pub-id pub-id-type="doi">10.1109/TNNLS.2017.2773458</pub-id>
          <pub-id pub-id-type="medline">29771662</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Howard</surname>
              <given-names>RA</given-names>
            </name>
          </person-group>
          <source>Dynamic Programming and Markov Process</source>
          <year>1960</year>
          <publisher-loc>New York, USA</publisher-loc>
          <publisher-name>MIT Press and Wiley</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mnih</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Kavukcuoglu</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Silver</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Graves</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Antonoglou</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Wierstra</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Riedmiller</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Playing atari with deep reinforcement learning</article-title>
          <source>arXiv preprint</source>
          <year>2013</year>
          <fpage>-</fpage>
          <comment>epub ahead of print(1312.5602)<ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1312.5602"/></comment>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Silver</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Maddison</surname>
              <given-names>CJ</given-names>
            </name>
            <name name-style="western">
              <surname>Guez</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Sifre</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>van den Driessche</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Schrittwieser</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Antonoglou</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Panneershelvam</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Lanctot</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Dieleman</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Grewe</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Nham</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Kalchbrenner</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Sutskever</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Lillicrap</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Leach</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Kavukcuoglu</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Graepel</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Hassabis</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Mastering the game of Go with deep neural networks and tree search</article-title>
          <source>Nature</source>
          <year>2016</year>
          <month>01</month>
          <day>28</day>
          <volume>529</volume>
          <issue>7587</issue>
          <fpage>484</fpage>
          <lpage>9</lpage>
          <pub-id pub-id-type="doi">10.1038/nature16961</pub-id>
          <pub-id pub-id-type="medline">26819042</pub-id>
          <pub-id pub-id-type="pii">nature16961</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ng</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Coates</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Diel</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Ganapathi</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Schulte</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Tse</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Berger</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Liang</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Autonomous inverted autonomous helicopter flight via reinforcement learning</article-title>
          <source>Experimental Robotics IX</source>
          <year>2006</year>
          <publisher-loc>New York, USA</publisher-loc>
          <publisher-name>Springer</publisher-name>
          <fpage>363</fpage>
          <lpage>72</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Komorowski</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Celi</surname>
              <given-names>LA</given-names>
            </name>
            <name name-style="western">
              <surname>Badawi</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Gordon</surname>
              <given-names>AC</given-names>
            </name>
            <name name-style="western">
              <surname>Faisal</surname>
              <given-names>AA</given-names>
            </name>
          </person-group>
          <article-title>The artificial intelligence clinician learns optimal treatment strategies for sepsis in intensive care</article-title>
          <source>Nat Med</source>
          <year>2018</year>
          <month>11</month>
          <volume>24</volume>
          <issue>11</issue>
          <fpage>1716</fpage>
          <lpage>20</lpage>
          <pub-id pub-id-type="doi">10.1038/s41591-018-0213-5</pub-id>
          <pub-id pub-id-type="medline">30349085</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41591-018-0213-5</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Riedmiller</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Neural Fitted Q Iteration – First Experiences with a Data Efficient Neural Reinforcement Learning Method</article-title>
          <source>Proceedings of the European Conference on Machine Learning</source>
          <year>2005</year>
          <conf-name>ECML'05</conf-name>
          <conf-date>October 3-7, 2005</conf-date>
          <conf-loc>Porto, Portugal</conf-loc>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1007/11564096_32"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/11564096_32</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>van Hasselt</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Guez</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Silver</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Deep Reinforcement Learning With Double Q-learning</article-title>
          <source>Proceedings of the Thirtieth AAAI Conference on Artificial Intelligence</source>
          <year>2016</year>
          <conf-name>AAAI'16</conf-name>
          <conf-date>February 12-17, 2016</conf-date>
          <conf-loc>Phoenix, Arizona, USA</conf-loc>
          <pub-id pub-id-type="doi">10.5555/3016100.3016191</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mnih</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Puigdomenech</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Mirza</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Graves</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Lillicrap</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Harley</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Silver</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Kavukcuoglu</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Asynchronous methods for deep reinforcement learning</article-title>
          <source>Arxiv</source>
          <year>2016</year>
          <fpage>-</fpage>
          <comment>epub ahead of print(1602.01783)<ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1602.01783"/></comment>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Doya</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Samejima</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Katagiri</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Kawato</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Multiple model-based reinforcement learning</article-title>
          <source>Neural Comput</source>
          <year>2002</year>
          <month>06</month>
          <volume>14</volume>
          <issue>6</issue>
          <fpage>1347</fpage>
          <lpage>69</lpage>
          <pub-id pub-id-type="doi">10.1162/089976602753712972</pub-id>
          <pub-id pub-id-type="medline">12020450</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Arulkumaran</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Deisenroth</surname>
              <given-names>MP</given-names>
            </name>
            <name name-style="western">
              <surname>Brundage</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Bharath</surname>
              <given-names>AA</given-names>
            </name>
          </person-group>
          <article-title>Deep reinforcement learning: a brief survey</article-title>
          <source>IEEE Signal Process Mag</source>
          <year>2017</year>
          <month>11</month>
          <volume>34</volume>
          <issue>6</issue>
          <fpage>26</fpage>
          <lpage>38</lpage>
          <pub-id pub-id-type="doi">10.1109/MSP.2017.2743240</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="editor">
            <name name-style="western">
              <surname>Wiering</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>van Otterlo</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <source>Reinforcement Learning: State-of-the-Art</source>
          <year>2012</year>
          <publisher-loc>Berlin, Heidelberg</publisher-loc>
          <publisher-name>Springer-Verlag</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Siqi</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Reinforcement-Learning</article-title>
          <source>GitHub</source>
          <access-date>2020-01-01</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://github.com/nus-mornin-lab/Reinforcement-Learning">https://github.com/nus-mornin-lab/Reinforcement-Learning</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Borera</surname>
              <given-names>EC</given-names>
            </name>
            <name name-style="western">
              <surname>Moore</surname>
              <given-names>BL</given-names>
            </name>
            <name name-style="western">
              <surname>Doufas</surname>
              <given-names>AG</given-names>
            </name>
            <name name-style="western">
              <surname>Pyeatt</surname>
              <given-names>LD</given-names>
            </name>
          </person-group>
          <article-title>An Adaptive Neural Network Filter for Improved Patient State Estimation in Closed-Loop Anesthesia Control</article-title>
          <source>23rd International Conference on Tools with Artificial Intelligence</source>
          <year>2011</year>
          <conf-name>ICTAI'11</conf-name>
          <conf-date>November 7-9, 2011</conf-date>
          <conf-loc>Boca Raton, FL, USA</conf-loc>
          <pub-id pub-id-type="doi">10.1109/ictai.2011.15</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Padmanabhan</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Meskin</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Haddad</surname>
              <given-names>WM</given-names>
            </name>
          </person-group>
          <article-title>Closed-loop Control of Anesthesia and Mean Arterial Pressure Using Reinforcement Learning</article-title>
          <source>Symposium on Adaptive Dynamic Programming and Reinforcement Learning</source>
          <year>2014</year>
          <conf-name>ADPRL'14</conf-name>
          <conf-date>December 9-12,  2014</conf-date>
          <conf-loc>Orlando, FL, USA</conf-loc>
          <pub-id pub-id-type="doi">10.1109/ADPRL.2014.7010644</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Padmanabhan</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Meskin</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Haddad</surname>
              <given-names>WM</given-names>
            </name>
          </person-group>
          <article-title>Reinforcement Learning-Based Control for Combined Infusion of Sedatives and Analgesics</article-title>
          <source>4th International Conference on Control, Decision and Information Technologies</source>
          <year>2017</year>
          <conf-name>CoDIT'17</conf-name>
          <conf-date>April 5-7, 2017</conf-date>
          <conf-loc>Barcelona, Spain</conf-loc>
          <pub-id pub-id-type="doi">10.1109/codit.2017.8102643</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Padmanabhan</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Meskin</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Haddad</surname>
              <given-names>WM</given-names>
            </name>
          </person-group>
          <article-title>Optimal adaptive control of drug dosing using integral reinforcement learning</article-title>
          <source>Math Biosci</source>
          <year>2019</year>
          <month>03</month>
          <volume>309</volume>
          <fpage>131</fpage>
          <lpage>42</lpage>
          <pub-id pub-id-type="doi">10.1016/j.mbs.2019.01.012</pub-id>
          <pub-id pub-id-type="medline">30735696</pub-id>
          <pub-id pub-id-type="pii">S0025-5564(18)30358-4</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ghassemi</surname>
              <given-names>MM</given-names>
            </name>
            <name name-style="western">
              <surname>Alhanai</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Westover</surname>
              <given-names>MB</given-names>
            </name>
            <name name-style="western">
              <surname>Mark</surname>
              <given-names>TG</given-names>
            </name>
            <name name-style="western">
              <surname>Nemati</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Personalized Medication Dosing Using Volatile Data Streams</article-title>
          <source>Proceedings of the Thirty-Second AAAI Conference on Artificial Intelligence</source>
          <year>2018</year>
          <conf-name>AAAI'18</conf-name>
          <conf-date>February 2-7, 2018</conf-date>
          <conf-loc>New Orleans, Louisiana, USA</conf-loc>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://aaai.org/ocs/index.php/WS/AAAIW18/paper/view/17234"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Stanley</surname>
              <given-names>MD</given-names>
            </name>
            <name name-style="western">
              <surname>Ghassemi</surname>
              <given-names>MM</given-names>
            </name>
            <name name-style="western">
              <surname>Nemati</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>A deep deterministic policy gradient approach to medication dosing and surveillance in the ICU</article-title>
          <source>Conf Proc IEEE Eng Med Biol Soc</source>
          <year>2018</year>
          <month>07</month>
          <volume>2018</volume>
          <fpage>4927</fpage>
          <lpage>31</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/30441448"/>
          </comment>
          <pub-id pub-id-type="doi">10.1109/EMBC.2018.8513203</pub-id>
          <pub-id pub-id-type="medline">30441448</pub-id>
          <pub-id pub-id-type="pmcid">PMC6876300</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Raghu</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Komorowski</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Ahmed</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Celi</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Szolovits</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Ghassemi</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Deep reinforcement learning for sepsis treatment</article-title>
          <source>arXiv</source>
          <year>2017</year>
          <fpage>-</fpage>
          <comment>epub ahead of print(1711.09602)<ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1711.09602"/></comment>
        </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Raghu</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Komorowski</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Celi</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Szolovits</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Ghassemi</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Continuous state-space models for optimal sepsis treatment-a deep reinforcement learning approach</article-title>
          <source>arXiv</source>
          <year>2017</year>
          <fpage>-</fpage>
          <comment>epub ahead of print(1705.08422)<ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1705.08422"/></comment>
        </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Raghu</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Komorowski</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Singh</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Model-based reinforcement learning for sepsis treatment</article-title>
          <source>arXiv</source>
          <year>2018</year>
          <fpage>-</fpage>
          <comment>epub ahead of print-1811.09602<ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1811.09602"/></comment>
        </nlm-citation>
      </ref>
      <ref id="ref38">
        <label>38</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Futoma</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Sendak</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Bedoya</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Clement</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>O'Brien</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Heller</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Learning to Treat Sepsis with Multi-Output Gaussian Process Deep Recurrent Q-Networks</article-title>
          <source>OpenReview</source>
          <year>2018</year>
          <access-date>2020-06-10</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://openreview.net/forum?id=SyxCqGbRZ">https://openreview.net/forum?id=SyxCqGbRZ</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref39">
        <label>39</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Peng</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Ding</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Wihl</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Gottesman</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Komorowski</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Lehman</surname>
              <given-names>LH</given-names>
            </name>
            <name name-style="western">
              <surname>Ross</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Faisal</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Doshi-Velez</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <article-title>Improving sepsis treatment strategies by combining deep and kernel-based reinforcement learning</article-title>
          <source>AMIA Annu Symp Proc</source>
          <year>2018</year>
          <volume>2018</volume>
          <fpage>887</fpage>
          <lpage>96</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/30815131"/>
          </comment>
          <pub-id pub-id-type="medline">30815131</pub-id>
          <pub-id pub-id-type="pmcid">PMC6371300</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref40">
        <label>40</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>Donghun</given-names>
            </name>
            <name name-style="western">
              <surname>Srinivasan</surname>
              <given-names>Srivatsan</given-names>
            </name>
            <name name-style="western">
              <surname>Doshi-Velez</surname>
              <given-names>Finale</given-names>
            </name>
          </person-group>
          <article-title>Truly Batch Apprenticeship Learning with Deep Successor Features</article-title>
          <year>2019</year>
          <conf-name>International Joint Conferences on Artificial Intelligence Organization</conf-name>
          <conf-date>August 10-16, 2019</conf-date>
          <conf-loc>Macao, China</conf-loc>
          <pub-id pub-id-type="doi">10.24963/ijcai.2019/819</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref41">
        <label>41</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lopez-Martinez</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Eschenfeldt</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Ostvar</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Ingram</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Hur</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Picard</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Deep reinforcement learning for optimal critical care pain management with morphine using dueling double-deep Q networks</article-title>
          <source>Conf Proc IEEE Eng Med Biol Soc</source>
          <year>2019</year>
          <month>07</month>
          <volume>2019</volume>
          <fpage>3960</fpage>
          <lpage>3</lpage>
          <pub-id pub-id-type="doi">10.1109/EMBC.2019.8857295</pub-id>
          <pub-id pub-id-type="medline">31946739</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref42">
        <label>42</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Petersen</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Grathwohl</surname>
              <given-names>WS</given-names>
            </name>
            <name name-style="western">
              <surname>Cockrell</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Santiago</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>An</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Faissol</surname>
              <given-names>DM</given-names>
            </name>
          </person-group>
          <article-title>Precision medicine as a control problem: Using simulation and deep reinforcement learning to discover adaptive, personalized multi-cytokine therapy for sepsis</article-title>
          <source>arXiv preprint. arXiv:1802.10440</source>
          <year>2018</year>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1802.10440"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref43">
        <label>43</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Prasad</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Cheng</surname>
              <given-names>LF</given-names>
            </name>
            <name name-style="western">
              <surname>Chivers</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Draugelis</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Engelhardt</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>A reinforcement learning approach to weaning of mechanical ventilation in intensive care units</article-title>
          <source>Arxiv</source>
          <year>2017</year>
          <fpage>-</fpage>
          <comment>epub ahead of print(1704.06300)<ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1704.06300"/></comment>
        </nlm-citation>
      </ref>
      <ref id="ref44">
        <label>44</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cheng</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Prasad</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Engelhardt</surname>
              <given-names>BE</given-names>
            </name>
          </person-group>
          <article-title>An optimal policy for patient laboratory tests in intensive care units</article-title>
          <source>Pac Symp Biocomput</source>
          <year>2019</year>
          <volume>24</volume>
          <fpage>320</fpage>
          <lpage>31</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://psb.stanford.edu/psb-online/proceedings/psb19/abstracts/2019_p320.html"/>
          </comment>
          <pub-id pub-id-type="medline">30864333</pub-id>
          <pub-id pub-id-type="pii">9789813279827_0029</pub-id>
          <pub-id pub-id-type="pmcid">PMC6417830</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref45">
        <label>45</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yu</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Inverse reinforcement learning for intelligent mechanical ventilation and sedative dosing in intensive care units</article-title>
          <source>BMC Med Inform Decis Mak</source>
          <year>2019</year>
          <month>04</month>
          <day>9</day>
          <volume>19</volume>
          <issue>Suppl 2</issue>
          <fpage>57</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcmedinformdecismak.biomedcentral.com/articles/10.1186/s12911-019-0763-6"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12911-019-0763-6</pub-id>
          <pub-id pub-id-type="medline">30961594</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12911-019-0763-6</pub-id>
          <pub-id pub-id-type="pmcid">PMC6454602</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref46">
        <label>46</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Weng</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Gao</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>He</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Yan</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Szolovits</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Representation and reinforcement learning for personalized glycemic control in septic patients</article-title>
          <source>arXiv</source>
          <year>2017</year>
          <fpage>-</fpage>
          <comment>epub ahead of print(1712.00654)<ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1712.00654"/></comment>
        </nlm-citation>
      </ref>
      <ref id="ref47">
        <label>47</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>He</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Zha</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Supervised Reinforcement Learning with Recurrent Neural Network for Dynamic Treatment Recommendation</article-title>
          <source>Proceedings of the 24th ACM SIGKDD International Conference on Knowledge Discovery &#38; Data Mining</source>
          <year>2018</year>
          <conf-name>KDD'18</conf-name>
          <conf-date>August 19-23, 2018</conf-date>
          <conf-loc>London, UK</conf-loc>
          <pub-id pub-id-type="doi">10.1145/3219819.3219961</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref48">
        <label>48</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Manrai</surname>
              <given-names>AK</given-names>
            </name>
            <name name-style="western">
              <surname>Patel</surname>
              <given-names>CJ</given-names>
            </name>
            <name name-style="western">
              <surname>Ioannidis</surname>
              <given-names>JP</given-names>
            </name>
          </person-group>
          <article-title>In the era of precision medicine and big data, who is normal?</article-title>
          <source>J Am Med Assoc</source>
          <year>2018</year>
          <month>05</month>
          <day>15</day>
          <volume>319</volume>
          <issue>19</issue>
          <fpage>1981</fpage>
          <lpage>2</lpage>
          <pub-id pub-id-type="doi">10.1001/jama.2018.2009</pub-id>
          <pub-id pub-id-type="medline">29710130</pub-id>
          <pub-id pub-id-type="pii">2679460</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref49">
        <label>49</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Johnson</surname>
              <given-names>AE</given-names>
            </name>
            <name name-style="western">
              <surname>Pollard</surname>
              <given-names>TJ</given-names>
            </name>
            <name name-style="western">
              <surname>Shen</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Lehman</surname>
              <given-names>LH</given-names>
            </name>
            <name name-style="western">
              <surname>Feng</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Ghassemi</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Moody</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Szolovits</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Celi</surname>
              <given-names>LA</given-names>
            </name>
            <name name-style="western">
              <surname>Mark</surname>
              <given-names>RG</given-names>
            </name>
          </person-group>
          <article-title>MIMIC-III, a freely accessible critical care database</article-title>
          <source>Sci Data</source>
          <year>2016</year>
          <month>05</month>
          <day>24</day>
          <volume>3</volume>
          <fpage>160035</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/27219127"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/sdata.2016.35</pub-id>
          <pub-id pub-id-type="medline">27219127</pub-id>
          <pub-id pub-id-type="pii">sdata201635</pub-id>
          <pub-id pub-id-type="pmcid">PMC4878278</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref50">
        <label>50</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ng</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>CS294A Lecture Notes: Sparse Autoencoder</article-title>
          <source>Stanford University</source>
          <year>2011</year>
          <access-date>2020-06-09</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://web.stanford.edu/class/cs294a/sparseAutoencoder.pdf">https://web.stanford.edu/class/cs294a/sparseAutoencoder.pdf</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref51">
        <label>51</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kane-Gill</surname>
              <given-names>SL</given-names>
            </name>
            <name name-style="western">
              <surname>Dasta</surname>
              <given-names>JF</given-names>
            </name>
            <name name-style="western">
              <surname>Buckley</surname>
              <given-names>MS</given-names>
            </name>
            <name name-style="western">
              <surname>Devabhakthuni</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Cohen</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>George</surname>
              <given-names>EL</given-names>
            </name>
            <name name-style="western">
              <surname>Pohlman</surname>
              <given-names>AS</given-names>
            </name>
            <name name-style="western">
              <surname>Agarwal</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Henneman</surname>
              <given-names>EA</given-names>
            </name>
            <name name-style="western">
              <surname>Bejian</surname>
              <given-names>SM</given-names>
            </name>
            <name name-style="western">
              <surname>Berenholtz</surname>
              <given-names>SM</given-names>
            </name>
            <name name-style="western">
              <surname>Pepin</surname>
              <given-names>JL</given-names>
            </name>
            <name name-style="western">
              <surname>Scanlon</surname>
              <given-names>MC</given-names>
            </name>
            <name name-style="western">
              <surname>Smith</surname>
              <given-names>BS</given-names>
            </name>
          </person-group>
          <article-title>Clinical practice guideline: safe medication use in the ICU</article-title>
          <source>Crit Care Med</source>
          <year>2017</year>
          <month>09</month>
          <volume>45</volume>
          <issue>9</issue>
          <fpage>e877</fpage>
          <lpage>915</lpage>
          <pub-id pub-id-type="doi">10.1097/CCM.0000000000002533</pub-id>
          <pub-id pub-id-type="medline">28816851</pub-id>
          <pub-id pub-id-type="pii">00003246-201709000-00032</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref52">
        <label>52</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hochreiter</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Schmidhuber</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Long short-term memory</article-title>
          <source>Neural Comput</source>
          <year>1997</year>
          <month>11</month>
          <day>15</day>
          <volume>9</volume>
          <issue>8</issue>
          <fpage>1735</fpage>
          <lpage>80</lpage>
          <pub-id pub-id-type="doi">10.1162/neco.1997.9.8.1735</pub-id>
          <pub-id pub-id-type="medline">9377276</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref53">
        <label>53</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sinderby</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Breck</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Brander</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <person-group person-group-type="editor">
            <name name-style="western">
              <surname>Vincent</surname>
              <given-names>JL</given-names>
            </name>
          </person-group>
          <article-title>Bedside monitoring of diaphragm electrical activity during mechanical ventilation</article-title>
          <source>Yearbook of Intensive Care and Emergency Medicine</source>
          <year>2009</year>
          <publisher-loc>New York, NY</publisher-loc>
          <publisher-name>Springer</publisher-name>
          <fpage>385</fpage>
          <lpage>393</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref54">
        <label>54</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Patel</surname>
              <given-names>SB</given-names>
            </name>
            <name name-style="western">
              <surname>Kress</surname>
              <given-names>JP</given-names>
            </name>
          </person-group>
          <article-title>Sedation and analgesia in the mechanically ventilated patient</article-title>
          <source>Am J Respir Crit Care Med</source>
          <year>2012</year>
          <month>03</month>
          <day>1</day>
          <volume>185</volume>
          <issue>5</issue>
          <fpage>486</fpage>
          <lpage>97</lpage>
          <pub-id pub-id-type="doi">10.1164/rccm.201102-0273CI</pub-id>
          <pub-id pub-id-type="medline">22016443</pub-id>
          <pub-id pub-id-type="pii">201102-0273CI</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref55">
        <label>55</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cohen</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Shapiro</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Grozovski</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Singer</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Automatic tube compensation-assisted respiratory rate to tidal volume ratio improves the prediction of weaning outcome</article-title>
          <source>Chest</source>
          <year>2002</year>
          <month>09</month>
          <volume>122</volume>
          <issue>3</issue>
          <fpage>980</fpage>
          <lpage>4</lpage>
          <pub-id pub-id-type="doi">10.1378/chest.122.3.980</pub-id>
          <pub-id pub-id-type="medline">12226043</pub-id>
          <pub-id pub-id-type="pii">S0012-3692(16)47198-9</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref56">
        <label>56</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kashyap</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Loftsgard</surname>
              <given-names>TO</given-names>
            </name>
          </person-group>
          <article-title>Clinicians role in reducing lab order frequency in ICU settings</article-title>
          <source>J Perioper Crit Intens Care Nurs</source>
          <year>2015</year>
          <volume>2</volume>
          <issue>1</issue>
          <fpage>320</fpage>
          <lpage>31</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://psb.stanford.edu/psb-online/proceedings/psb19/abstracts/2019_p320.html"/>
          </comment>
          <pub-id pub-id-type="doi">10.4172/jpcic.1000112</pub-id>
          <pub-id pub-id-type="pii">9789813279827_0029</pub-id>
          <pub-id pub-id-type="pmcid">PMC6417830</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref57">
        <label>57</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Osborne</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Roberts</surname>
              <given-names>SJ</given-names>
            </name>
            <name name-style="western">
              <surname>Rogers</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Ramchurn</surname>
              <given-names>SD</given-names>
            </name>
            <name name-style="western">
              <surname>Jennings</surname>
              <given-names>NR</given-names>
            </name>
          </person-group>
          <article-title>Towards Real-Time Information Processing of Sensor Network Data Using Computationally Efficient Multi-output Gaussian Processes</article-title>
          <source>International Conference on Information Processing in Sensor Networks</source>
          <year>2008</year>
          <conf-name>IPSN'08</conf-name>
          <conf-date>April 22-24, 2008</conf-date>
          <conf-loc>St Louis, MO, USA</conf-loc>
          <pub-id pub-id-type="doi">10.1109/ipsn.2008.25</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref58">
        <label>58</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kent</surname>
              <given-names>JT</given-names>
            </name>
          </person-group>
          <article-title>Information gain and a general measure of correlation</article-title>
          <source>Biometrika</source>
          <year>1983</year>
          <month>04</month>
          <volume>70</volume>
          <issue>1</issue>
          <fpage>163</fpage>
          <lpage>73</lpage>
          <pub-id pub-id-type="doi">10.2307/2335954</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref59">
        <label>59</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Kruger</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Roberts</surname>
              <given-names>MS</given-names>
            </name>
          </person-group>
          <article-title>Optimizing drug dosing in the ICU</article-title>
          <source>Yearbook of Intensive Care and Emergency Medicine</source>
          <year>2009</year>
          <publisher-loc>New York, USA</publisher-loc>
          <publisher-name>Springer</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref60">
        <label>60</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Smith</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <person-group person-group-type="editor">
            <name name-style="western">
              <surname>Vincent</surname>
              <given-names>JL</given-names>
            </name>
          </person-group>
          <article-title>Monitoring and managing raised intracranial pressure after traumatic brain injury</article-title>
          <source>Intensive Care Medicine</source>
          <year>2009</year>
          <publisher-loc>New York, NY</publisher-loc>
          <publisher-name>Springer</publisher-name>
          <fpage>801</fpage>
          <lpage>808</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref61">
        <label>61</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Iannuzzi</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Iannuzzi</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Viola</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Sidro</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Cardinale</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Chiefari</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>BIS - AAI and clinical measures during propofol target controlled infusion with Schnider's pharmacokinetic model</article-title>
          <source>Minerva Anestesiol</source>
          <year>2007</year>
          <volume>73</volume>
          <issue>1-2</issue>
          <fpage>23</fpage>
          <lpage>31</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.minervamedica.it/index2.t?show=R02Y2007N01A0023"/>
          </comment>
          <pub-id pub-id-type="medline">17115013</pub-id>
          <pub-id pub-id-type="pii">R02064799</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref62">
        <label>62</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Levi</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Emergency reversal of antithrombotic treatment</article-title>
          <source>Intern Emerg Med</source>
          <year>2009</year>
          <month>04</month>
          <volume>4</volume>
          <issue>2</issue>
          <fpage>137</fpage>
          <lpage>45</lpage>
          <pub-id pub-id-type="doi">10.1007/s11739-008-0201-8</pub-id>
          <pub-id pub-id-type="medline">19002653</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref63">
        <label>63</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Saeed</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Villarroel</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Reisner</surname>
              <given-names>AT</given-names>
            </name>
            <name name-style="western">
              <surname>Clifford</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Lehman</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Moody</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Heldt</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Kyaw</surname>
              <given-names>TH</given-names>
            </name>
            <name name-style="western">
              <surname>Moody</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Mark</surname>
              <given-names>RG</given-names>
            </name>
          </person-group>
          <article-title>Multiparameter intelligent monitoring in intensive care II: a public-access intensive care unit database</article-title>
          <source>Crit Care Med</source>
          <year>2011</year>
          <month>05</month>
          <volume>39</volume>
          <issue>5</issue>
          <fpage>952</fpage>
          <lpage>60</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/21283005"/>
          </comment>
          <pub-id pub-id-type="doi">10.1097/CCM.0b013e31820a92c6</pub-id>
          <pub-id pub-id-type="medline">21283005</pub-id>
          <pub-id pub-id-type="pmcid">PMC3124312</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref64">
        <label>64</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cohen</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Vincent</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Adhikari</surname>
              <given-names>NK</given-names>
            </name>
            <name name-style="western">
              <surname>Machado</surname>
              <given-names>FR</given-names>
            </name>
            <name name-style="western">
              <surname>Angus</surname>
              <given-names>DC</given-names>
            </name>
            <name name-style="western">
              <surname>Calandra</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Jaton</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Giulieri</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Delaloye</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Opal</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Tracey</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>van der Poll</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Pelfrene</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Sepsis: a roadmap for future research</article-title>
          <source>Lancet Infect Dis</source>
          <year>2015</year>
          <month>05</month>
          <volume>15</volume>
          <issue>5</issue>
          <fpage>581</fpage>
          <lpage>614</lpage>
          <pub-id pub-id-type="doi">10.1016/S1473-3099(15)70112-X</pub-id>
          <pub-id pub-id-type="medline">25932591</pub-id>
          <pub-id pub-id-type="pii">S1473-3099(15)70112-X</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref65">
        <label>65</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Singer</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Deutschman</surname>
              <given-names>CS</given-names>
            </name>
            <name name-style="western">
              <surname>Seymour</surname>
              <given-names>CW</given-names>
            </name>
            <name name-style="western">
              <surname>Shankar-Hari</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Annane</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Bauer</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Bellomo</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Bernard</surname>
              <given-names>GR</given-names>
            </name>
            <name name-style="western">
              <surname>Chiche</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Coopersmith</surname>
              <given-names>CM</given-names>
            </name>
            <name name-style="western">
              <surname>Hotchkiss</surname>
              <given-names>RS</given-names>
            </name>
            <name name-style="western">
              <surname>Levy</surname>
              <given-names>MM</given-names>
            </name>
            <name name-style="western">
              <surname>Marshall</surname>
              <given-names>JC</given-names>
            </name>
            <name name-style="western">
              <surname>Martin</surname>
              <given-names>GS</given-names>
            </name>
            <name name-style="western">
              <surname>Opal</surname>
              <given-names>SM</given-names>
            </name>
            <name name-style="western">
              <surname>Rubenfeld</surname>
              <given-names>GD</given-names>
            </name>
            <name name-style="western">
              <surname>van der Poll</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Vincent</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Angus</surname>
              <given-names>DC</given-names>
            </name>
          </person-group>
          <article-title>The third international consensus definitions for sepsis and septic shock (sepsis-3)</article-title>
          <source>J Am Med Assoc</source>
          <year>2016</year>
          <month>02</month>
          <day>23</day>
          <volume>315</volume>
          <issue>8</issue>
          <fpage>801</fpage>
          <lpage>10</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/26903338"/>
          </comment>
          <pub-id pub-id-type="doi">10.1001/jama.2016.0287</pub-id>
          <pub-id pub-id-type="medline">26903338</pub-id>
          <pub-id pub-id-type="pii">2492881</pub-id>
          <pub-id pub-id-type="pmcid">PMC4968574</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref66">
        <label>66</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Jeter</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Josef</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Shashikumar</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Nemati</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Does the artificial intelligence clinician learn optimal treatment strategies for sepsis in intensive care?</article-title>
          <source>arXiv</source>
          <year>2019</year>
          <fpage>-</fpage>
          <comment>epub ahead of print(1902.03271)<ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1902.03271"/></comment>
        </nlm-citation>
      </ref>
      <ref id="ref67">
        <label>67</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lillicrap</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Hunt</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Pritzel</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Heess</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Erez</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Tassa</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Silver</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Wierstra</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Continuous control with deep reinforcement learning</article-title>
          <source>arXiv</source>
          <year>2015</year>
          <fpage>-</fpage>
          <comment>epub ahead of print(1509.02971)<ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1509.02971"/></comment>
        </nlm-citation>
      </ref>
      <ref id="ref68">
        <label>68</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>An</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>In silico experiments of existing and hypothetical cytokine-directed clinical trials using agent-based modeling</article-title>
          <source>Crit Care Med</source>
          <year>2004</year>
          <month>10</month>
          <volume>32</volume>
          <issue>10</issue>
          <fpage>2050</fpage>
          <lpage>60</lpage>
          <pub-id pub-id-type="doi">10.1097/01.ccm.0000139707.13729.7d</pub-id>
          <pub-id pub-id-type="medline">15483414</pub-id>
          <pub-id pub-id-type="pii">00003246-200410000-00011</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref69">
        <label>69</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Barr</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Fraser</surname>
              <given-names>GL</given-names>
            </name>
            <name name-style="western">
              <surname>Puntillo</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Ely</surname>
              <given-names>EW</given-names>
            </name>
            <name name-style="western">
              <surname>Gélinas</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Dasta</surname>
              <given-names>JF</given-names>
            </name>
            <name name-style="western">
              <surname>Davidson</surname>
              <given-names>JE</given-names>
            </name>
            <name name-style="western">
              <surname>Devlin</surname>
              <given-names>JW</given-names>
            </name>
            <name name-style="western">
              <surname>Kress</surname>
              <given-names>JP</given-names>
            </name>
            <name name-style="western">
              <surname>Joffe</surname>
              <given-names>AM</given-names>
            </name>
            <name name-style="western">
              <surname>Coursin</surname>
              <given-names>DB</given-names>
            </name>
            <name name-style="western">
              <surname>Herr</surname>
              <given-names>DL</given-names>
            </name>
            <name name-style="western">
              <surname>Tung</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Robinson</surname>
              <given-names>BR</given-names>
            </name>
            <name name-style="western">
              <surname>Fontaine</surname>
              <given-names>DK</given-names>
            </name>
            <name name-style="western">
              <surname>Ramsay</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Riker</surname>
              <given-names>RR</given-names>
            </name>
            <name name-style="western">
              <surname>Sessler</surname>
              <given-names>CN</given-names>
            </name>
            <name name-style="western">
              <surname>Pun</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Skrobik</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Jaeschke</surname>
              <given-names>R</given-names>
            </name>
            <collab>American College of Critical Care Medicine</collab>
          </person-group>
          <article-title>Clinical practice guidelines for the management of pain, agitation, and delirium in adult patients in the intensive care unit</article-title>
          <source>Crit Care Med</source>
          <year>2013</year>
          <month>01</month>
          <volume>41</volume>
          <issue>1</issue>
          <fpage>263</fpage>
          <lpage>306</lpage>
          <pub-id pub-id-type="doi">10.1097/CCM.0b013e3182783b72</pub-id>
          <pub-id pub-id-type="medline">23269131</pub-id>
          <pub-id pub-id-type="pii">00003246-201301000-00029</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref70">
        <label>70</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yu</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Tang</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Lai</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Su</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Gong</surname>
              <given-names>Z</given-names>
            </name>
          </person-group>
          <article-title>Continuous infusion versus intermittent bolus dosing of morphine: a comparison of analgesia, tolerance, and subsequent voluntary morphine intake</article-title>
          <source>J Psychiatr Res</source>
          <year>2014</year>
          <month>12</month>
          <volume>59</volume>
          <fpage>161</fpage>
          <lpage>6</lpage>
          <pub-id pub-id-type="doi">10.1016/j.jpsychires.2014.08.009</pub-id>
          <pub-id pub-id-type="medline">25193460</pub-id>
          <pub-id pub-id-type="pii">S0022-3956(14)00245-3</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref71">
        <label>71</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rutter</surname>
              <given-names>PC</given-names>
            </name>
            <name name-style="western">
              <surname>Murphy</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Dudley</surname>
              <given-names>HA</given-names>
            </name>
          </person-group>
          <article-title>Morphine: controlled trial of different methods of administration for postoperative pain relief</article-title>
          <source>Br Med J</source>
          <year>1980</year>
          <month>01</month>
          <day>5</day>
          <volume>280</volume>
          <issue>6206</issue>
          <fpage>12</fpage>
          <lpage>3</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/6986940"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/bmj.280.6206.12</pub-id>
          <pub-id pub-id-type="medline">6986940</pub-id>
          <pub-id pub-id-type="pmcid">PMC1600502</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref72">
        <label>72</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Nagaraj</surname>
              <given-names>SB</given-names>
            </name>
            <name name-style="western">
              <surname>McClain</surname>
              <given-names>LM</given-names>
            </name>
            <name name-style="western">
              <surname>Zhou</surname>
              <given-names>DW</given-names>
            </name>
            <name name-style="western">
              <surname>Biswal</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Rosenthal</surname>
              <given-names>ES</given-names>
            </name>
            <name name-style="western">
              <surname>Purdon</surname>
              <given-names>PL</given-names>
            </name>
            <name name-style="western">
              <surname>Westover</surname>
              <given-names>MB</given-names>
            </name>
          </person-group>
          <article-title>Automatic classification of sedation levels in ICU patients using heart rate variability</article-title>
          <source>Critical Care Medicine</source>
          <year>2016</year>
          <volume>44</volume>
          <issue>9</issue>
          <fpage>e782</fpage>
          <lpage>9</lpage>
          <pub-id pub-id-type="doi">10.1097/ccm.0000000000001708</pub-id>
          <pub-id pub-id-type="medline">27035240</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref73">
        <label>73</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Carrasco</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Instruments for monitoring intensive care unit sedation</article-title>
          <source>Crit Care</source>
          <year>2000</year>
          <volume>4</volume>
          <issue>4</issue>
          <fpage>217</fpage>
          <lpage>25</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://ccforum.biomedcentral.com/articles/10.1186/cc697"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/cc697</pub-id>
          <pub-id pub-id-type="medline">11094504</pub-id>
          <pub-id pub-id-type="pmcid">PMC150039</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref74">
        <label>74</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lewis</surname>
              <given-names>Sharon R</given-names>
            </name>
            <name name-style="western">
              <surname>Pritchard</surname>
              <given-names>Michael W</given-names>
            </name>
            <name name-style="western">
              <surname>Fawcett</surname>
              <given-names>Lizzy J</given-names>
            </name>
            <name name-style="western">
              <surname>Punjasawadwong</surname>
              <given-names>Yodying</given-names>
            </name>
          </person-group>
          <article-title>Bispectral index for improving intraoperative awareness and early postoperative recovery in adults</article-title>
          <source>Cochrane Database Syst Rev</source>
          <year>2019</year>
          <month>09</month>
          <day>26</day>
          <volume>9</volume>
          <issue>6</issue>
          <fpage>CD003843</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/24937564"/>
          </comment>
          <pub-id pub-id-type="doi">10.1002/14651858.CD003843.pub4</pub-id>
          <pub-id pub-id-type="medline">31557307</pub-id>
          <pub-id pub-id-type="pmcid">PMC6763215</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref75">
        <label>75</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Williams</surname>
              <given-names>JD</given-names>
            </name>
            <name name-style="western">
              <surname>Young</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Partially observable Markov decision processes for spoken dialog systems</article-title>
          <source>Comput Speech Lang</source>
          <year>2007</year>
          <month>04</month>
          <volume>21</volume>
          <issue>2</issue>
          <fpage>393</fpage>
          <lpage>422</lpage>
          <pub-id pub-id-type="doi">10.1016/j.csl.2006.06.008</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref76">
        <label>76</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Maei</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Szepesv´ari</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Bhatnagar</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Sutton</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Toward Off-Policy Learning Control with Function Approximation</article-title>
          <source>The 27th International Conference on Machine Learning</source>
          <year>2010</year>
          <conf-name>ICML'10</conf-name>
          <conf-date>June 21-24, 2010</conf-date>
          <conf-loc>Haifa, Israel</conf-loc>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://icml.cc/Conferences/2010/papers/627.pdf"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref77">
        <label>77</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>McShea</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Holl</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Badawi</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Riker</surname>
              <given-names>RR</given-names>
            </name>
            <name name-style="western">
              <surname>Silfen</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>The eICU research institute - a collaboration between industry, health-care providers, and academia</article-title>
          <source>IEEE Eng Med Biol Mag</source>
          <year>2010</year>
          <volume>29</volume>
          <issue>2</issue>
          <fpage>18</fpage>
          <lpage>25</lpage>
          <pub-id pub-id-type="doi">10.1109/MEMB.2009.935720</pub-id>
          <pub-id pub-id-type="medline">20659837</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref78">
        <label>78</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ngiam</surname>
              <given-names>KY</given-names>
            </name>
            <name name-style="western">
              <surname>Khor</surname>
              <given-names>IW</given-names>
            </name>
          </person-group>
          <article-title>Big data and machine learning algorithms for health-care delivery</article-title>
          <source>Lancet Oncol</source>
          <year>2019</year>
          <month>05</month>
          <volume>20</volume>
          <issue>5</issue>
          <fpage>e262</fpage>
          <lpage>73</lpage>
          <pub-id pub-id-type="doi">10.1016/S1470-2045(19)30149-4</pub-id>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
