<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="research-article"><front><journal-meta><journal-id journal-id-type="nlm-ta">J Med Internet Res</journal-id><journal-id journal-id-type="publisher-id">jmir</journal-id><journal-id journal-id-type="index">1</journal-id><journal-title>Journal of Medical Internet Research</journal-title><abbrev-journal-title>J Med Internet Res</abbrev-journal-title><issn pub-type="epub">1438-8871</issn><publisher><publisher-name>JMIR Publications</publisher-name><publisher-loc>Toronto, Canada</publisher-loc></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">v27i1e63847</article-id><article-id pub-id-type="doi">10.2196/63847</article-id><article-categories><subj-group subj-group-type="heading"><subject>Original Paper</subject></subj-group></article-categories><title-group><article-title>Optimizing Vital Signs in Patients With Traumatic Brain Injury: Reinforcement Learning Algorithm Development and Validation</article-title></title-group><contrib-group><contrib contrib-type="author"><name name-style="western"><surname>Zhang</surname><given-names>Hongwei</given-names></name><degrees>MD</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author" corresp="yes"><name name-style="western"><surname>Diao</surname><given-names>Mengyuan</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Zhang</surname><given-names>Sheng</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Ni</surname><given-names>Peifeng</given-names></name><degrees>MD</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Zhang</surname><given-names>Weidong</given-names></name><degrees>MD</degrees><xref ref-type="aff" rid="aff3">3</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Wu</surname><given-names>Chenxi</given-names></name><degrees>MD</degrees><xref ref-type="aff" rid="aff3">3</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Zhu</surname><given-names>Ying</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Hu</surname><given-names>Wei</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib></contrib-group><aff id="aff1"><institution>Department of Critical Care Medicine, Affiliated Hangzhou First People's Hospital, School of Medicine, Westlake University</institution><addr-line>261 Huansha Road</addr-line><addr-line>Hangzhou</addr-line><country>China</country></aff><aff id="aff2"><institution>Department of Critical Care Medicine, Ruijin Hospital, Shanghai Jiao Tong University, School of Medicine</institution><addr-line>Shanghai</addr-line><country>China</country></aff><aff id="aff3"><institution>Department of Critical Care Medicine, The Fourth College of Clinical Medicine, Zhejiang Chinese Medicial University</institution><addr-line>Hangzhou</addr-line><country>China</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Schwartz</surname><given-names>Amy</given-names></name></contrib></contrib-group><contrib-group><contrib contrib-type="reviewer"><name name-style="western"><surname>Holst</surname><given-names>Anders</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Kuo</surname><given-names>Ching-Lung</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to Mengyuan Diao, PhD, Department of Critical Care Medicine, Affiliated Hangzhou First People's Hospital, School of Medicine, Westlake University, 261 Huansha Road, Hangzhou, 310006, China, 86 13634164536; <email>diaomengyuan@hospital.westlake.edu.cn</email></corresp></author-notes><pub-date pub-type="collection"><year>2025</year></pub-date><pub-date pub-type="epub"><day>3</day><month>7</month><year>2025</year></pub-date><volume>27</volume><elocation-id>e63847</elocation-id><history><date date-type="received"><day>02</day><month>07</month><year>2024</year></date><date date-type="rev-recd"><day>17</day><month>04</month><year>2025</year></date><date date-type="accepted"><day>29</day><month>04</month><year>2025</year></date></history><copyright-statement>&#x00A9; Hongwei Zhang, Mengyuan Diao, Sheng Zhang, Peifeng Ni, Weidong Zhang, Chenxi Wu, Ying Zhu, Wei Hu. Originally published in the Journal of Medical Internet Research (<ext-link ext-link-type="uri" xlink:href="https://www.jmir.org">https://www.jmir.org</ext-link>), 3.7.2025. </copyright-statement><copyright-year>2025</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in the Journal of Medical Internet Research (ISSN 1438-8871), is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="https://www.jmir.org/">https://www.jmir.org/</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://www.jmir.org/2025/1/e63847"/><abstract><sec><title>Background</title><p>Traumatic brain injury (TBI) is a critically ill disease with a high mortality rate, and clinical treatment is committed to continuously optimizing treatment strategies to improve survival rates.</p></sec><sec><title>Objective</title><p>This study aims to establish a reinforcement learning algorithm (RL) to optimize the survival prognosis decision-making scheme for patients with TBI in the intensive care unit</p></sec><sec sec-type="methods"><title>Methods</title><p>We included a total of 2745 patients from the Medical Information Mart for Intensive Care (MIMIC)&#x2013;IV database and randomly divided them into a training set and an internal validation set at 8:2. We extracted 34 features for analysis and modeling using a 2-hour time compensation, 2 action features (mean arterial pressure and temperature), and 1 outcome feature (survival status at 28 d). We used an RL algorithm called weighted dueling double deep Q-network with embedded human expertise to maximize cumulative returns and evaluated the model using a doubly robust off-policy evaluation method. Finally, we collected 2463 patients with TBI from MIMIC III as an external validation set to test the model.</p></sec><sec sec-type="results"><title>Results</title><p>The action features are divided into 6 intervals, and the expected benefits are estimated using a doubly robust off-policy evaluation method. The results indicate that the survival rate of artificial intelligence (AI) strategies is higher than that of clinical doctors (88.016%, 95% CI 85.191%&#x2010;90.840% vs 81.094%, 95% CI 80.422%&#x2010;81.765%), with an expected return of 28.978 (95% CI 28.797&#x2010;29.160) versus 27.092 (95% CI 24.584&#x2010;29.600). Compared with clinical doctors, AI algorithms select normal temperatures more frequently (36.56 &#x00B0;C to 36.83 &#x2103;) and recommend mean arterial pressure levels of 87.5&#x2010;95.0 mm Hg. In external validation, the AI strategy still has a high survival rate of 87.565%, with an expected return of 27.517.</p></sec><sec sec-type="conclusions"><title>Conclusions</title><p>This RL algorithm for patients with TBI indicates that a more personalized and targeted optimization of the vital signs is possible. This algorithm will assist clinicians in making decisions on an individualized patient-by-patient basis.</p></sec></abstract><kwd-group><kwd>traumatic brain injury</kwd><kwd>reinforcement learning</kwd><kwd>temperature</kwd><kwd>mean arterial pressure</kwd></kwd-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><p>Traumatic brain injury (TBI) constitutes a major cause of mortality and morbidity among patients with trauma, with an estimated 69 million new cases globally each year. In the United States, data from the Centers for Disease Control and Prevention reported 288,000 TBI-related hospitalizations in 2014, resulting in 56,800 fatalities [<xref ref-type="bibr" rid="ref1">1</xref>]. In China, the mortality rate attributable to TBI stands at approximately 13 per 100,000 individuals [<xref ref-type="bibr" rid="ref2">2</xref>], while in France and South Africa, the rates are 5.2 and 80.73 per 100,000, respectively [<xref ref-type="bibr" rid="ref3">3</xref>]. TBI encompasses a spectrum of clinical presentations, from transient concussions to profound comas. Management of TBI is multifaceted, encompassing prehospital care, emergency department interventions, surgical procedures, and intensive care unit (ICU) management. ICU treatment specifically involves comprehensive strategies including hemodynamic monitoring, intracranial pressure (ICP) control, temperature regulation, thromboprophylaxis, gastrointestinal ulcer prevention, seizure prophylaxis, and nutritional support [<xref ref-type="bibr" rid="ref4">4</xref>]. Ongoing research is essential to refine and optimize ICU management protocols for patients with TBI.</p><p>Machine learning (ML) and artificial intelligence (AI) hold significant potential for advancing clinical decision-making processes. Prior research has demonstrated the utility of ML in various domains, including the management of sepsis [<xref ref-type="bibr" rid="ref5">5</xref>,<xref ref-type="bibr" rid="ref6">6</xref>], weaning from acute respiratory distress syndrome [<xref ref-type="bibr" rid="ref7">7</xref>], and optimizing mechanical ventilation settings [<xref ref-type="bibr" rid="ref8">8</xref>,<xref ref-type="bibr" rid="ref9">9</xref>]. The aim of reinforcement learning (RL) is to use interactions with an environment to find actions that maximize reward. Guided trial-and-error search and delayed feedback are key aspects that distinguish RL from other ML methods [<xref ref-type="bibr" rid="ref10">10</xref>]. Compared with other ML methods, advantages of RL include strong self-learning, high adaptability, long-term reward optimization, and sequential decision-making. RL addresses the balance between exploration and exploitation through its reward mechanism. RL adjusts its policy to learn the optimal action in the context of sparse feedback, avoiding heavy reliance on labeled data. This advantage is particularly evident in task-oriented dialogue systems or complex task management, where RL can learn to respond to indirect or delayed feedback. RL holds immense potential in the ICU, particularly in personalized treatment, real-time decision-making, and complex monitoring tasks. It can assist physicians in formulating and optimizing treatment plans, improving the quality and safety of critical care for patients [<xref ref-type="bibr" rid="ref11">11</xref>]. Patients in the ICU often present with complex and rapidly changing conditions, and standardized treatment protocols may not meet individualized needs. RL models can learn from historical data about how different patients respond to treatments, allowing for dynamic adjustments to the treatment plan and the creation of personalized strategies for each patient.</p><p>Despite its promise, there is a paucity of AI research specifically targeting the management of TBI in the ICU. To address this gap, we used a novel algorithm, the weighted dueling double deep Q-network with embedded human expertise (WD3QNE), to optimize treatment strategies for TBI. The WD3QNE algorithm advances traditional double deep Q-networks with dueling networks and dueling deep Q-network methods by incorporating a target Q-value function with adaptive dynamic weights, thereby enhancing estimation accuracy [<xref ref-type="bibr" rid="ref12">12</xref>]. Furthermore, it integrates clinical expertise to improve the performance of RL in clinical decision-making.</p></sec><sec id="s2" sec-type="methods"><title>Methods</title><sec id="s2-1"><title>Data Sources and Data Processing</title><p>The study cohort comprised patients with TBI aged 18 years and older, as identified in the Medical Information Mart for Intensive Care (MIMIC)&#x2013;IV database using <italic>International Classification of Diseases&#x200C;</italic>, <italic>Ninth Revision</italic>, codes 800, 801, 803, 804, 850&#x2010;854, and <italic>International Classification of Diseases&#x200C;</italic>, <italic>Tenth Revision</italic>, code S06. Only the first ICU admission for each patient was included, with the dataset divided into a training set (80%) and an internal validation set (20%). External validation was performed using data from the MIMIC-III database. Collected patient variables included age, Glasgow Coma Scale, Systemic Inflammatory Response Syndrome, Sequential Organ Failure Assessment (SOFA) score, heart rate, respiratory rate, FiO2, pCO2, SpO2, pO2, pO2/FiO2 ratio, temperature, mean arterial pressure (MAP), white blood cell count, hemoglobin, platelet count, activated partial thromboplastin time, prothrombin time, international normalized ratio, glucose, total bilirubin, lactate, creatinine, aspartate aminotransferase, alanine aminotransferase, blood urea nitrogen, pH, base excess, bands, potassium, sodium, calcium, magnesium, chloride, bicarbonate, and urine output. Data were collected for the first 72 hours post-ICU admission, with a time step of 2 hours. For multiple measurements within a 2-hour interval, the mean value was used, except for the Glasgow Coma Scale (minimum value) and urine output (summed). Missing data were initially forward-filled, and remaining gaps were imputed using the k-nearest neighbors algorithm. Continuous variables were normalized to a range of &#x2212;1 to 1. The experiments were conducted in a Python 3.9 environment using the PyTorch framework. All computations were performed on a personal computer equipped with a 2.60 GHz Intel Core i5-11400F CPU and 16GB of RAM.</p></sec><sec id="s2-2"><title>Ethical Considerations</title><p>MIMIC data has undergone strict deidentification processing by the Massachusetts Institute of Technology, and all patient information cannot be traced back to personal identity. Moreover, MIMIC data collection has obtained exemption from the Institutional Review Board of the Massachusetts Institute of Technology for informed consent from patients. Researchers MD and HZ have completed the CITI Program ethical examination certification on the PhysioNet platform, submitted an application through PhysioNet, and signed a data usage agreement (certification numbers 1630201 and 13402134).</p></sec><sec id="s2-3"><title>RL Algorithm Overview</title><sec id="s2-3-1"><title>Markov Decision Process</title><p>We simulated the health trajectories and clinical decisions of patients in the ICU using a Markov decision process (MDP), which is a continuous interaction process between the agent and the environment. By defining 5 elements&#x2014;state space (<italic>S</italic>), action space (<italic>A</italic>), state transition probability (<italic>P</italic>), reward function (<italic>R</italic>), and discount factor (<italic>&#x03B3;</italic>)&#x2014;the interaction process between the agent and the environment is transformed into a computable model [<xref ref-type="bibr" rid="ref13">13</xref>]. In 2010, Alagoz et al [<xref ref-type="bibr" rid="ref14">14</xref>] proposed using MDP to solve the problem of sequential clinical treatment under uncertainty, and subsequently more researchers have focused on clinical decision analysis in RL [<xref ref-type="bibr" rid="ref15">15</xref>].</p><p>The specific analysis process is as follows: the intelligent agent selects action <italic>A<sub>t</sub></italic> based on the current state <italic>S<sub>t</sub></italic>; For state <italic>S<sub>t</sub></italic> and action <italic>A<sub>t</sub></italic>, MDP obtains <italic>R<sub>t</sub></italic> and <italic>S<sub>t</sub></italic> based on the reward function and state transition function, and feeds them back to the agent. The goal of an intelligent agent is to maximize the accumulated reward obtained. The function by which an intelligent agent selects an action from the set of actions <italic>A</italic> based on its current state is called strategy <italic>&#x03C0;</italic>. The strategy <italic>&#x03C0;</italic>(<italic>a|s</italic>)<italic>=P</italic>(<italic>A<sub>t</sub>=a|S<sub>t</sub>=s</italic>) is a function that represents the probability of taking action &#x201C;<italic>a&#x201D;</italic> after the input state &#x201C;<italic>s&#x201D;</italic>. When a strategy is a stochastic policy, it outputs a probability distribution of actions in each state, and then samples based on this distribution to obtain an action. In MDP, the state value function <italic>V<sup>&#x03C0;</sup></italic>(<italic>s</italic>) based on policy <italic>&#x03C0;</italic> is the expected return that can be obtained by following policy <italic>&#x03C0;</italic> starting from state &#x201C;<italic>s&#x201D;</italic>; In addition, due to the existence of actions, an additional action value function <italic>Q<sup>&#x03C0;</sup></italic> (<italic>s,a</italic>) based on policy <italic>&#x03C0;</italic> is defined to represent the expected return obtained by performing action &#x201C;<italic>a&#x201D;</italic> on the current state &#x201C;<italic>s&#x201D;</italic> when MDP follows policy <italic>&#x03C0;</italic>; <italic>&#x03B3;</italic> is a discount factor with a value range of [0,1]. The reason for introducing a discount factor is that forward benefits have a certain degree of uncertainty and sometimes we prefer to obtain some rewards as soon as possible, so we need to make some deductions for forward benefits. <italic>&#x03B3;</italic> values close to 1 focus more on long-term cumulative rewards, while <italic>&#x03B3;</italic> values close to 0 consider short-term rewards. The details of MDP in this study can be found in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p></sec><sec id="s2-3-2"><title>Computational Model</title><sec id="s2-3-2-1"><title>Overview</title><p>Patient states encompassed demographic information, vital signs, and laboratory results recorded at each time point. The actions involved adjustments to temperature and MAP. An RL agent, functioning as an AI clinician, made decisions based on the patient&#x2019;s current state, determining the appropriate temperature and MAP control ranges. Following the implementation of these adjustments, patients transitioned to subsequent states, and the agent received reward feedback, guiding future decisions. To optimize cumulative rewards, we utilized the WD3QNE algorithm. WD3QNE develops a scoring system to assess recommended temperature and MAP ranges based on patient health states, enhancing decision quality through iterative score improvement. This algorithm is adept at managing sparse and delayed reward signals, making it particularly effective in addressing patient heterogeneity in treatment responses and delayed treatment efficacy indicators. Such a framework supports adaptive medical decision-making systems, accommodating a variety of patient scenarios, even in the presence of discontinuous reward signals or delayed clinical responses, thereby optimizing treatment strategies. The specifics of the states, actions, and rewards are delineated as given in the following sections.</p></sec><sec id="s2-3-2-2"><title>State</title><p>The state space comprises consolidated patient clinical characteristics, with the SOFA score excluded from the state space but utilized as an intermediate reward during the training phase.</p></sec><sec id="s2-3-2-3"><title>Action</title><p>Interventions for temperature and MAP are administered every 2 hours. A 6x6 action matrix is used, where temperature and MAP measurements at each time point are converted into integers corresponding to their respective sextiles, thereby defining the action space.</p></sec><sec id="s2-3-2-4"><title>Reward</title><p>The primary focus of our reward system is patient survival, with rewards assessed after a sequence of clinical decisions. Additionally, we incorporate intermediate rewards, reflecting changes in the SOFA score, and final rewards based on survival status at 28 days.</p><disp-formula id="equWL1"><mml:math id="eqn1"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mi>r</mml:mi><mml:mrow><mml:mtext>&#x00A0;</mml:mtext></mml:mrow><mml:mo>=</mml:mo><mml:mrow><mml:mo>{</mml:mo><mml:mtable columnalign="left left" rowspacing=".2em" columnspacing="1em" displaystyle="false"><mml:mtr><mml:mtd><mml:msub><mml:mi>&#x03B2;</mml:mi><mml:mrow><mml:mi>s</mml:mi></mml:mrow></mml:msub><mml:mo>&#x00D7;</mml:mo><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mi>S</mml:mi><mml:mi>O</mml:mi><mml:mi>F</mml:mi><mml:msub><mml:mi>A</mml:mi><mml:mrow><mml:mi>t</mml:mi><mml:mo>+</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>&#x2212;</mml:mo><mml:mi>S</mml:mi><mml:mi>O</mml:mi><mml:mi>F</mml:mi><mml:msub><mml:mi>A</mml:mi><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>)</mml:mo></mml:mrow><mml:mtext>&#x00A0;</mml:mtext><mml:mtext>&#x00A0;</mml:mtext><mml:mi>t</mml:mi><mml:mo>&#x003C;</mml:mo><mml:mi>T</mml:mi></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:msub><mml:mi>R</mml:mi><mml:mrow><mml:mi>T</mml:mi></mml:mrow></mml:msub><mml:mtext>&#x00A0;</mml:mtext><mml:mtext>&#x00A0;</mml:mtext><mml:mtext>&#x00A0;</mml:mtext><mml:mtext>&#x00A0;</mml:mtext><mml:mtext>&#x00A0;</mml:mtext><mml:mtext>&#x00A0;</mml:mtext><mml:mtext>&#x00A0;</mml:mtext><mml:mtext>&#x00A0;</mml:mtext><mml:mtext>&#x00A0;</mml:mtext><mml:mtext>&#x00A0;</mml:mtext><mml:mtext>&#x00A0;</mml:mtext><mml:mtext>&#x00A0;</mml:mtext><mml:mtext>&#x00A0;</mml:mtext><mml:mtext>&#x00A0;</mml:mtext><mml:mtext>&#x00A0;</mml:mtext><mml:mtext>&#x00A0;</mml:mtext><mml:mtext>&#x00A0;</mml:mtext><mml:mtext>&#x00A0;</mml:mtext><mml:mi>t</mml:mi><mml:mo>=</mml:mo><mml:mi>T</mml:mi></mml:mtd></mml:mtr></mml:mtable><mml:mo fence="true" stretchy="true" symmetric="true"/></mml:mrow><mml:mrow><mml:mtext>&#x00A0;</mml:mtext></mml:mrow></mml:mrow></mml:mstyle></mml:math></disp-formula><p><italic>R<sub>T</sub></italic> represents patient survival as 50 or death as &#x2212;50. The reward parameter <inline-formula><mml:math id="ieqn1"><mml:msub><mml:mrow><mml:mi>&#x03B2;</mml:mi></mml:mrow><mml:mrow><mml:mi>s</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> is set to &#x2212;0.3.</p></sec><sec id="s2-3-2-5"><title>Discount Factor</title><p>This parameter balances the consideration of future long-term rewards against immediate rewards in the RL model, with possible values ranging from 0 to 1. We selected a discount factor of 0.99, indicating that late-stage mortality is given nearly equal importance as early-stage mortality in the decision-making process.</p></sec></sec></sec><sec id="s2-4"><title>Clinician and Agent Policy Construct</title><p>Clinician policy is defined as the continuous clinical decision-making trajectory reconstructed based on real-world electronic health record data, with its core features focusing on the management of target intervals for body temperature and MAP, presenting the following modeling characteristics:</p><list list-type="order"><list-item><p>Modeling of the action space: the temperature and MAP regulation strategies are discretized into a 6&#x00D7;6 gridded action space.</p></list-item><list-item><p>Temporal decision dynamics: with a decision interval unit of 2 hours, the state-action pair mapping relationship is constructed through the real-time temperature, MAP target values, and corresponding physiological indicators (such as heart rate and lactate level) recorded in the electronic health record.</p></list-item><list-item><p>Integration mechanism of background interventions: explicit exclusions-other treatment measures are not included in the RL action space. Implicit coupling paths-the physiological effects of background interventions are dynamically reflected through multi-dimensional state vectors (such as white blood cell count), and the cumulative impact of background treatments on organ functions is systematically captured through the design of SOFA score and the end-point mortality in the composite reward function.</p></list-item></list><p>AI policy is a bivariate joint optimization strategy for temperature and MAP generated through deep RL, featuring the following:</p><list list-type="order"><list-item><p>Policy consistency: strictly reusing the 6&#x00D7;6 discrete action space of clinician policy.</p></list-item><list-item><p>Differentiated optimization objectives: maximizing the reward function based on survival end points and dynamic changes in SOFA by regulating the temperature-MAP combination.</p></list-item><list-item><p>Boundaries of clinical adaptability: first, the AI strategy does not change the existing treatment framework but only provides dynamic adjustment suggestions for the management of temperature and MAP. Second, other treatment measures, as uncontrollable covariates, have their effects continuously transmitted to the next decision cycle through the Markovian assumption of state observables. Third, the final conclusion regarding survival benefits is strictly limited to the additional benefits obtained through refined management of body temperature and blood pressure within the context of current standard treatments.</p></list-item></list></sec><sec id="s2-5"><title>Off-Policy Evaluation</title><p>In model evaluation, the effectiveness of the AI-derived policy is assessed by comparing it to health state trajectories generated by human clinicians. We use a doubly robust off-policy value evaluation method, which integrates importance sampling with an approximation of the MDP, to compute unbiased estimates for each trajectory. The calculation formula is as follows:</p><p>Where <italic>&#x03C1;</italic> represents the importance ratio between the AI policy &#x03C0;<sub>1</sub> and the clinician policy <inline-formula><mml:math id="ieqn2"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:msub><mml:mi>&#x03C0;</mml:mi><mml:mn>0</mml:mn></mml:msub><mml:mtext>&#x00A0;</mml:mtext><mml:mo>:</mml:mo><mml:mtext>&#x00A0;</mml:mtext><mml:mi>&#x03C1;</mml:mi><mml:mo>=</mml:mo><mml:mfrac><mml:msub><mml:mi>&#x03C0;</mml:mi><mml:mn>1</mml:mn></mml:msub><mml:msub><mml:mi>&#x03C0;</mml:mi><mml:mn>0</mml:mn></mml:msub></mml:mfrac></mml:mrow></mml:mstyle></mml:math></inline-formula>. &#x1D449;&#x0302; (&#x1D446;<sub>&#x1D461;</sub>) is the evaluation value. <inline-formula><mml:math id="ieqn3"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mrow><mml:mover><mml:mi>Q</mml:mi><mml:mo>^</mml:mo></mml:mover></mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mi>S</mml:mi><mml:mi>t</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>a</mml:mi><mml:mi>t</mml:mi></mml:msub><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mstyle></mml:math></inline-formula>  is the expected return of taking action <italic>a</italic> under state <italic>S<sub>t</sub></italic>.</p><p>To further evaluate the survival rate of the policy, we applied a policy-based State-Action-Reward-State-Action RL algorithm to establish the relationship between expected return and survival rate: <inline-formula><mml:math id="ieqn4"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mi>Q</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mi>S</mml:mi><mml:mi>t</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>a</mml:mi><mml:mi>t</mml:mi></mml:msub><mml:mo stretchy="false">)</mml:mo><mml:mo stretchy="false">&#x2190;</mml:mo><mml:mi>Q</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mi>S</mml:mi><mml:mi>t</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>a</mml:mi><mml:mi>t</mml:mi></mml:msub><mml:mo stretchy="false">)</mml:mo><mml:mo>+</mml:mo><mml:mi>&#x03B1;</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mi>r</mml:mi><mml:mo>+</mml:mo><mml:mi>&#x03B3;</mml:mi><mml:mi>Q</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mi>S</mml:mi><mml:mrow><mml:mi>t</mml:mi><mml:mo>+</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>a</mml:mi><mml:mrow><mml:mi>t</mml:mi><mml:mo>+</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo stretchy="false">)</mml:mo><mml:mo>&#x2212;</mml:mo><mml:mi>Q</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mi>S</mml:mi><mml:mi>t</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>a</mml:mi><mml:mi>t</mml:mi></mml:msub><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:mstyle></mml:math></inline-formula>. First, the expected return value <italic>V</italic> is computed. Then, based on the return value, we calculate the average survival rate. The survival formula is as follows:</p><disp-formula id="equWL2"><mml:math id="eqn2"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mi>S</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:msub><mml:mi>Q</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mi>s</mml:mi><mml:mi>u</mml:mi><mml:msub><mml:mi>r</mml:mi><mml:mrow><mml:msub><mml:mi>V</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:msub></mml:mrow><mml:mrow><mml:mi>t</mml:mi><mml:mi>a</mml:mi><mml:msub><mml:mi>l</mml:mi><mml:mrow><mml:msub><mml:mi>V</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:msub></mml:mrow></mml:mfrac></mml:mrow></mml:mstyle></mml:math></disp-formula><p>where <inline-formula><mml:math id="ieqn5"><mml:msub><mml:mrow><mml:mi>s</mml:mi><mml:mi>u</mml:mi><mml:mi>r</mml:mi></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>V</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:msub></mml:math></inline-formula> represents the number of survivors, <inline-formula><mml:math id="ieqn6"><mml:msub><mml:mrow><mml:mi>t</mml:mi><mml:mi>a</mml:mi><mml:mi>l</mml:mi></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>V</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:msub></mml:math></inline-formula> represents the total population given the expected return <italic>V<sub>i</sub>. V<sub>i</sub></italic> is an integer and <inline-formula><mml:math id="ieqn7"><mml:msub><mml:mrow><mml:mi>V</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo>&#x2208;</mml:mo><mml:msub><mml:mrow><mml:mi>V</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula>. The relationship between expected return and survival rate is illustrated in <xref ref-type="fig" rid="figure1">Figure 1</xref>. The survival rate is positively correlated with the expected return.</p><fig position="float" id="figure1"><label>Figure 1.</label><caption><p>The relationship between expected return and survival rate.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="jmir_v27i1e63847_fig01.png"/></fig></sec></sec><sec id="s3" sec-type="results"><title>Results</title><sec id="s3-1"><title>Model Performance</title><p>The data analysis process is illustrated in <xref ref-type="fig" rid="figure2">Figure 2</xref>. We included a total of 2745 patients from the MIMIC-IV database, who were randomly allocated into training (n=2198) and internal validation (n=547) cohorts in an 8:2 ratio. For external validation, 2463 patients with TBI were sourced from the MIMIC-III database. The clinical characteristics of the study population are detailed in <xref ref-type="table" rid="table1">Table 1</xref>.</p><fig position="float" id="figure2"><label>Figure 2.</label><caption><p>Flowchart of analysis. MAP: mean arterial pressure; MIMIC: Medical Information Mart for Intensive Care; SOFA: Sequential Organ Failure Assessment; TBI: traumatic brain injury.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="jmir_v27i1e63847_fig02.png"/></fig><table-wrap id="t1" position="float"><label>Table 1.</label><caption><p>Baseline characteristics of the study population.</p></caption><table id="table1" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom"/><td align="left" valign="bottom">Train (n=2198)</td><td align="left" valign="bottom">Internal validation (n=547)</td><td align="left" valign="bottom">External validation (n=2463)</td></tr></thead><tbody><tr><td align="left" valign="top">Age (years), mean (SD)</td><td align="left" valign="top">62.7 (22.2)</td><td align="left" valign="top">63.0 (21.6)</td><td align="left" valign="top">75.1 (67.3)</td></tr><tr><td align="left" valign="top">Sex (male), n (%)</td><td align="left" valign="top">1362 (62.0)</td><td align="left" valign="top">343 (62.7)</td><td align="left" valign="top">1518 (61.6)</td></tr><tr><td align="left" valign="top">Length of hospital stay (days), mean (SD)</td><td align="left" valign="top">9.12 (10.8)</td><td align="left" valign="top">9.40 (12.4)</td><td align="left" valign="top">9.42 (11.0)</td></tr><tr><td align="left" valign="top">Length of ICU<sup><xref ref-type="table-fn" rid="table1fn1">a</xref></sup> stay (days), mean (SD)</td><td align="left" valign="top">3.84 (5.49)</td><td align="left" valign="top">4.09 (5.53)</td><td align="left" valign="top">4.37 (5.84)</td></tr><tr><td align="left" valign="top">Hospital mortality, n (%)</td><td align="left" valign="top">284 (12.9)</td><td align="left" valign="top">68 (12.4)</td><td align="left" valign="top">395 (16.0)</td></tr><tr><td align="left" valign="top">28-day mortality, n (%)</td><td align="left" valign="top">362 (16.5)</td><td align="left" valign="top">92 (16.8)</td><td align="left" valign="top">446 (18.1)</td></tr></tbody></table><table-wrap-foot><fn id="table1fn1"><p><sup>a</sup>ICU: intensive care unit.</p></fn></table-wrap-foot></table-wrap><p>Initially, we used the MIMIC-IV dataset to estimate the expected return using a doubly robust off-policy evaluation method. Our findings reveal that the survival rate under the AI policy surpasses that under the clinician policy. Specifically, the survival rate with the AI policy is 88.016% (95% CI 85.191%&#x2010;90.840%), with an expected return of 28.978 (95% CI 28.797&#x2010;29.160). In contrast, the survival rate under the clinician policy is 81.094% (95% CI 80.422%&#x2010;81.765%), with an expected return of 27.092 (95% CI 24.584&#x2010;29.600). Furthermore, <xref ref-type="fig" rid="figure3">Figure 3</xref> illustrates the expected return across epochs, showing convergence and stabilization around a reward value of 29.</p><fig position="float" id="figure3"><label>Figure 3.</label><caption><p>Expected return of algorithm at each learning epoch.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="jmir_v27i1e63847_fig03.png"/></fig></sec><sec id="s3-2"><title>Model Strategy</title><p>The frequency distribution of the optimal AI strategy was compared with that of clinicians, with detailed analyses conducted for temperature and MAP adjustments, as presented in <xref ref-type="table" rid="table2">Tables 2</xref> and <xref ref-type="table" rid="table3">3</xref>, and <xref ref-type="fig" rid="figure4">Figures 4</xref> and <xref ref-type="fig" rid="figure5">5</xref>. The AI algorithm more frequently selected normal temperature ranges (36.56 &#x00B0;C to 36.83 &#x00B0;C), with an increase of 291.52% compared with clinicians, while selecting fewer temperatures outside the range of&#x003C;36.56 &#x00B0;C and &#x003E;37.72 &#x00B0;C. Additionally, the AI recommended MAP levels of 87.5&#x2010;95.0 mm Hg, 498.46% more often than clinicians.</p><table-wrap id="t2" position="float"><label>Table 2.</label><caption><p>Distribution of the chosen action by artificial intelligence in comparison to the clinicians&#x2019; performance.</p></caption><table id="table2" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom"/><td align="left" valign="bottom">Range 1</td><td align="left" valign="bottom">Range 2</td><td align="left" valign="bottom">Range 3</td><td align="left" valign="bottom">Range 4</td><td align="left" valign="bottom">Range 5</td><td align="left" valign="bottom">Range 6</td></tr></thead><tbody><tr><td align="left" valign="top">Temperature (&#x2103;)</td><td align="left" valign="top">&#x003C;36.56</td><td align="left" valign="top">36.56&#x2010;36.83</td><td align="left" valign="top">36.83&#x2010;37.06</td><td align="left" valign="top">37.06&#x2010;37.33</td><td align="left" valign="top">37.33&#x2010;37.72</td><td align="left" valign="top">&#x003E;37.72</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Difference, n (%)</td><td align="left" valign="top">&#x2212;1837 (&#x2212;14.05)</td><td align="left" valign="top">9075 (69.41)</td><td align="left" valign="top">&#x2212;1753 (&#x2212;13.41)</td><td align="left" valign="top">&#x2212;2299 (&#x2212;17.58)</td><td align="left" valign="top">&#x2212;1465 (&#x2212;11.2)</td><td align="left" valign="top">&#x2212;1721 (&#x2212;13.16)</td></tr><tr><td align="left" valign="top">MAP<sup><xref ref-type="table-fn" rid="table2fn1">a</xref></sup> (mm Hg)</td><td align="left" valign="top">&#x003C;70.0</td><td align="left" valign="top">70.0&#x2010;76.0</td><td align="left" valign="top">76.0&#x2010;81.58</td><td align="left" valign="top">81.58&#x2010;87.5</td><td align="left" valign="top">87.5&#x2010;95.0</td><td align="left" valign="top">&#x003E;95.0</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Difference, n (%)</td><td align="left" valign="top">&#x2212;2242 (&#x2212;17.15)</td><td align="left" valign="top">&#x2212;2230 (&#x2212;17.06)</td><td align="left" valign="top">&#x2212;2006 (&#x2212;15.34)</td><td align="left" valign="top">&#x2212;2109 (&#x2212;16.13)</td><td align="left" valign="top">10,383 (79.41)</td><td align="left" valign="top">&#x2212;1796 (&#x2212;13.74)</td></tr></tbody></table><table-wrap-foot><fn id="table2fn1"><p><sup>a</sup>MAP: mean arterial pressure.</p></fn></table-wrap-foot></table-wrap><table-wrap id="t3" position="float"><label>Table 3.</label><caption><p>Comparison of percentage of change for each action bin between artificial intelligence policy and clinician policy.</p></caption><table id="table3" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom"/><td align="left" valign="bottom">Range 1</td><td align="left" valign="bottom">Range 2</td><td align="left" valign="bottom">Range 3</td><td align="left" valign="bottom">Range 4</td><td align="left" valign="bottom">Range 5</td><td align="left" valign="bottom">Range 6</td></tr></thead><tbody><tr><td align="left" valign="top">Temperature (&#x2103;)</td><td align="left" valign="top">&#x003C;36.56</td><td align="left" valign="top">36.56&#x2010;36.83</td><td align="left" valign="top">36.83&#x2010;37.06</td><td align="left" valign="top">37.06&#x2010;37.33</td><td align="left" valign="top">37.33&#x2010;37.72</td><td align="left" valign="top">&#x003E;37.72</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Difference, n (%)</td><td align="left" valign="top">&#x2013;1837 (&#x2212;100)</td><td align="left" valign="top">9075 (291.52)</td><td align="left" valign="top">&#x2013;1753 (&#x2212;75.07)</td><td align="left" valign="top">&#x2013;2299 (&#x2212;97.21)</td><td align="left" valign="top">&#x2013;1465 (&#x2212;85.97)</td><td align="left" valign="top">&#x2013; 1721 (&#x2212;100)</td></tr><tr><td align="left" valign="top">MAP<sup><xref ref-type="table-fn" rid="table3fn1">a</xref></sup> (mm Hg)</td><td align="left" valign="top">&#x003C;70.0</td><td align="left" valign="top">70.0&#x2010;76.0</td><td align="left" valign="top">76.0&#x2010;81.58</td><td align="left" valign="top">81.58&#x2010;87.5</td><td align="left" valign="top">87.5&#x2010;95.0</td><td align="left" valign="top">&#x003E;95.0</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Difference, n (%)</td><td align="left" valign="top">&#x2013;2242 (&#x2212;96.35)</td><td align="left" valign="top">&#x2013;2230 (&#x2212;96.96)</td><td align="left" valign="top">&#x2013;2006 (&#x2212;93.22)</td><td align="left" valign="top">&#x2013;2109 (&#x2212;93.4)</td><td align="left" valign="top">10,383 (498.46)</td><td align="left" valign="top">&#x2013;1796 (&#x2212;91.87)</td></tr></tbody></table><table-wrap-foot><fn id="table3fn1"><p><sup>a</sup>MAP: mean arterial pressure.</p></fn></table-wrap-foot></table-wrap><fig position="float" id="figure4"><label>Figure 4.</label><caption><p>Visualization of the action distribution. The test set includes 13,075 decision time instances and the designed model facilitates 16 action bins in the action space. (A) Temperature (&#x2103;) and (B) MAP (mm Hg). MAP: mean arterial pressure.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="jmir_v27i1e63847_fig04.png"/></fig><fig position="float" id="figure5"><label>Figure 5.</label><caption><p>Number of action changes. The relative number of action changes (temperature and MAP) is shown in relation to the number of patients with traumatic brain injury at each 2-hour time step. Clinicians&#x2019; action changes are shown in blue while the artificial intelligence action changes are shown in orange. (A) Temperature (&#x2103;) and (B) MAP (mm Hg). MAP: mean arterial pressure</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="jmir_v27i1e63847_fig05.png"/></fig><p>Further analysis of the number of action changes made by the AI within each 2-hour time step over a 72-hour period revealed a consistently lower frequency of adjustments compared with clinicians (<xref ref-type="fig" rid="figure4">Figure 4</xref>), indicating the AI algorithm&#x2019;s preference for stable vital sign management. Feature importance for temperature and MAP changes was assessed using an out-of-bag analysis with random forests (<xref ref-type="table" rid="table4">Table 4</xref>). The top 5 clinical features influencing optimal temperature and MAP selections were age, heart rate, pO2, hemoglobin, and lactate. The importance weights for temperature and MAP adjustments were found to be equal.</p><table-wrap id="t4" position="float"><label>Table 4.</label><caption><p>Out-of-bag feature weight analysis of artificial intelligence. The relative weight of each feature using out-of-bag feature weight analysis, based on the relative loss of prediction, represented by an increase of the mean squared error.</p></caption><table id="table4" frame="hsides" rules="groups"><thead><tr><td align="left" valign="top">Variables and features</td><td align="left" valign="top">Importance</td></tr></thead><tbody><tr><td align="left" valign="top" colspan="2">Temperature (&#x2103;)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Age (years)</td><td align="left" valign="top">0.072</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Heart rate</td><td align="left" valign="top">0.068</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>SIRS<sup><xref ref-type="table-fn" rid="table4fn1">a</xref></sup></td><td align="left" valign="top">0.062</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>WBC<sup><xref ref-type="table-fn" rid="table4fn2">b</xref></sup></td><td align="left" valign="top">0.045</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Respiratory rate</td><td align="left" valign="top">0.040</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>PH<sup><xref ref-type="table-fn" rid="table4fn3">c</xref></sup></td><td align="left" valign="top">0.037</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>aPTT<sup><xref ref-type="table-fn" rid="table4fn4">d</xref></sup></td><td align="left" valign="top">0.035</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>PO2</td><td align="left" valign="top">0.034</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Calcium</td><td align="left" valign="top">0.034</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Magnesium</td><td align="left" valign="top">0.033</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Platelet</td><td align="left" valign="top">0.031</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>PT<sup><xref ref-type="table-fn" rid="table4fn5">e</xref></sup></td><td align="left" valign="top">0.030</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>SpO2</td><td align="left" valign="top">0.030</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Glucose</td><td align="left" valign="top">0.029</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Lactate</td><td align="left" valign="top">0.029</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>PCO2</td><td align="left" valign="top">0.028</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Chloride</td><td align="left" valign="top">0.027</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Bicarbonate</td><td align="left" valign="top">0.027</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Creatinine</td><td align="left" valign="top">0.024</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Bilirubin total</td><td align="left" valign="top">0.024</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Hemoglobin</td><td align="left" valign="top">0.024</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>PaO2/FiO2</td><td align="left" valign="top">0.023</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>FiO2</td><td align="left" valign="top">0.023</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Potassium</td><td align="left" valign="top">0.023</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Sodium</td><td align="left" valign="top">0.023</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>AST<sup><xref ref-type="table-fn" rid="table4fn6">f</xref></sup></td><td align="left" valign="top">0.023</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Urine output</td><td align="left" valign="top">0.022</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>INR<sup><xref ref-type="table-fn" rid="table4fn7">g</xref></sup></td><td align="left" valign="top">0.021</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Base excess</td><td align="left" valign="top">0.021</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Bands</td><td align="left" valign="top">0.018</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>BUN<sup><xref ref-type="table-fn" rid="table4fn8">h</xref></sup></td><td align="left" valign="top">0.017</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>GCS<sup><xref ref-type="table-fn" rid="table4fn9">i</xref></sup></td><td align="left" valign="top">0.013</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>ALT<sup><xref ref-type="table-fn" rid="table4fn10">j</xref></sup></td><td align="left" valign="top">0.010</td></tr><tr><td align="left" valign="top" colspan="2">MAP<sup><xref ref-type="table-fn" rid="table4fn11">k</xref></sup> (mm Hg)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Age (years)</td><td align="left" valign="top">0.068</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Heart rate</td><td align="left" valign="top">0.066</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>PO2</td><td align="left" valign="top">0.051</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Hemoglobin</td><td align="left" valign="top">0.050</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Lactate</td><td align="left" valign="top">0.043</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Urine output</td><td align="left" valign="top">0.035</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Platelet</td><td align="left" valign="top">0.034</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>AST</td><td align="left" valign="top">0.034</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>SpO2</td><td align="left" valign="top">0.033</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>aPTT</td><td align="left" valign="top">0.033</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Calcium</td><td align="left" valign="top">0.033</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>WBC</td><td align="left" valign="top">0.033</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Magnesium</td><td align="left" valign="top">0.032</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Respiratory rate</td><td align="left" valign="top">0.032</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Creatinine</td><td align="left" valign="top">0.031</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Glucose</td><td align="left" valign="top">0.031</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>PCO2</td><td align="left" valign="top">0.029</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>PT</td><td align="left" valign="top">0.029</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>INR</td><td align="left" valign="top">0.029</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>PH</td><td align="left" valign="top">0.029</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>PaO2/FiO2</td><td align="left" valign="top">0.028</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>FiO2</td><td align="left" valign="top">0.027</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Bilirubin total</td><td align="left" valign="top">0.027</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Chloride</td><td align="left" valign="top">0.024</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Base excess</td><td align="left" valign="top">0.023</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Potassium</td><td align="left" valign="top">0.021</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Bicarbonate</td><td align="left" valign="top">0.021</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Sodium</td><td align="left" valign="top">0.020</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>BUN</td><td align="left" valign="top">0.019</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Bands</td><td align="left" valign="top">0.017</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>GCS</td><td align="left" valign="top">0.010</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>ALT</td><td align="left" valign="top">0.008</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>SIRS</td><td align="left" valign="top">0.005</td></tr></tbody></table><table-wrap-foot><fn id="table4fn1"><p><sup>a</sup>SIRS: Systemic Inflammatory Response Syndrome.</p></fn><fn id="table4fn2"><p><sup>b</sup>WBC: white blood cell.</p></fn><fn id="table4fn3"><p><sup>c</sup>PH: potential of Hydrogen.</p></fn><fn id="table4fn4"><p><sup>d</sup>aPTT: activated partial thromboplastin time.</p></fn><fn id="table4fn5"><p><sup>e</sup>PT: prothrombin time.</p></fn><fn id="table4fn6"><p><sup>f</sup>AST: aspartate aminotransferase.</p></fn><fn id="table4fn7"><p><sup>g</sup>INR: international normalized ratio.</p></fn><fn id="table4fn8"><p><sup>h</sup>BUN: blood urea nitrogen.</p></fn><fn id="table4fn9"><p><sup>i</sup>GCS: Glasgow Coma Scale.</p></fn><fn id="table4fn10"><p><sup>j</sup>ALT: alanine aminotransferase.</p></fn><fn id="table4fn11"><p><sup>k</sup>MAP: mean arterial pressure.</p></fn></table-wrap-foot></table-wrap></sec><sec id="s3-3"><title>External Validation</title><p>External validation was conducted using data from the MIMIC-III database, which included a total of 2463 patients with TBI. The results show that the survival rate under the AI policy is 87.565% (95% CI 86.158%&#x2010;88.972%), with an expected return of 27.517 (95% CI 27.603&#x2010;27.431).</p></sec></sec><sec id="s4" sec-type="discussion"><title>Discussion</title><sec id="s4-1"><title>Principal Findings</title><p>We used the WD3QNE algorithm to develop an optimal 28-day survival strategy for patients in the ICU with TBI. This AI algorithm prioritizes the management of MAP and temperature to derive the optimal value function. The AI algorithm demonstrated improved 28-day survival rates in both the internal and external validation datasets.</p><p>The WD3QNE algorithm builds upon the foundations of the double deep Q-networks with dueling networks and dueling deep Q-network RL algorithms by incorporating a target Q-value function with adaptive dynamic weights, enhancing estimation accuracy, accelerating convergence, and improving stability. By integrating clinical expertise, the WD3QNE algorithm further enhances decision-making performance (<xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>). In this AI model, intermediate rewards are implemented to expedite the ML process by providing more frequent feedback. The SOFA score was selected for intermediate rewards due to its strong association with patient severity and mortality rates in the ICU [<xref ref-type="bibr" rid="ref16">16</xref>,<xref ref-type="bibr" rid="ref17">17</xref>]. Unlike lactate, lactate clearance rate, and base excess, the SOFA score uniquely differentiates between survival and death at the time of admission [<xref ref-type="bibr" rid="ref18">18</xref>].</p><p>In this study, the AI algorithm prioritizes specific temperature (36.56 &#x00B0;C to 36.83 &#x00B0;C) and MAP (87.5&#x2010;95.0 mm Hg) action intervals, resulting in improved outcomes and higher survival rates. Effective TBI management focuses on preventing hypotension, and hypoxia, and maintaining appropriate cerebral perfusion to mitigate secondary brain injury [<xref ref-type="bibr" rid="ref4">4</xref>]. MAP&#x2014;an easily obtainable and routinely monitored parameter&#x2014;was used as a key indicator. Compared with systolic blood pressure (SBP), MAP better reflects cerebral perfusion (CPP; CPP=MAP&#x2013;ICP). In pediatric TBI, lower MAP has been found to predict adverse outcomes (AUC=0.75) [<xref ref-type="bibr" rid="ref19">19</xref>]. Lower MAP and high SBP variability are associated with increased mortality in brain injury patients [<xref ref-type="bibr" rid="ref20">20</xref>]. While fewer clinical studies focus on MAP, research on SBP suggests maintaining SBP above critical thresholds is beneficial. Recent guidelines recommend keeping SBP above 100 mm Hg for patients with TBI aged 50&#x2010;69 years, and above 110 mm Hg for those aged 15&#x2010;49 years or over 70 years [<xref ref-type="bibr" rid="ref21">21</xref>-<xref ref-type="bibr" rid="ref23">23</xref>]. These findings align with our AI algorithm&#x2019;s preference for a higher MAP range of 87.5&#x2010;95.0 mm Hg, which may benefit patients with TBI by ensuring better cerebral perfusion. Changes in MAP influence cerebral blood vessel constriction and dilation, thereby affecting ICP. Increasing MAP can reduce ICP and help control intracranial pressure [<xref ref-type="bibr" rid="ref24">24</xref>]. In severe TBI cases where the autoregulatory function is lost, ICP trends consistently with MAP changes [<xref ref-type="bibr" rid="ref25">25</xref>]. Thus, hypotension below the range of cerebral vascular autoregulation leads to hypoperfusion, secondary cerebral ischemia, and hypoxia. Elevating blood pressure within the autoregulatory range, or in patients with TBI with impaired autoregulation, can optimize cerebral blood flow under cerebral blood flow monitoring. This underscores the importance of adjusting pressure to improve cerebral blood flow and suggests further exploration into optimizing oxygen metabolism in patients with TBI. Patients with TBI are subject to a cascade of pathological alterations, including secondary brain edema, mitochondrial dysfunction, calcium overload, and inflammatory responses [<xref ref-type="bibr" rid="ref26">26</xref>].</p><p>In the ICU, in addition to analgesia, sedation, and mechanical ventilation to balance oxygen supply and demand, temperature management remains a critical component. Preclinical studies have demonstrated that hypothermia can substantially reduce neuronal cell death and mitigate brain ischemia-reperfusion injury [<xref ref-type="bibr" rid="ref27">27</xref>-<xref ref-type="bibr" rid="ref29">29</xref>]. However, clinical trial outcomes regarding hypothermia&#x2019;s impact on the prognosis of patients with brain injury have been inconsistent [<xref ref-type="bibr" rid="ref30">30</xref>-<xref ref-type="bibr" rid="ref34">34</xref>]. These inconsistencies arise from significant variations in patient populations, temperature ranges, durations of hypothermia, and follow-up periods, underscoring the need for more rigorous research [<xref ref-type="bibr" rid="ref35">35</xref>]. The latest fourth edition guidelines for TBI specify that early (within 2.5 h postinjury), short-term (within 48 h postinjury) prophylactic hypothermia is not recommended to enhance outcomes in patients with diffuse brain injury [<xref ref-type="bibr" rid="ref36">36</xref>]. In contrast to the ongoing debate regarding hypothermia (&#x003C;36 &#x00B0;C) treatment for patients with TBI, fever (&#x003E;38 &#x00B0;C) is recognized as an adverse prognostic factor affecting TBI outcomes [<xref ref-type="bibr" rid="ref37">37</xref>]. Most of the patients with TBI experience fever, which can lead to increased intracranial pressure, cerebral metabolism, and exacerbation of ischemic injury. The study by Birg et al [<xref ref-type="bibr" rid="ref38">38</xref>] confirmed that when brain temperature exceeds 37.5 &#x00B0;C, intracranial pressure increases, resulting in decreased cerebral perfusion pressure&#x200C;. The investigation by Puccio et al [<xref ref-type="bibr" rid="ref39">39</xref>] further validated that intravascular cooling can ameliorate intracranial hypertension and mitigate secondary injuries. Consequently, the Seattle International Severe Traumatic Brain Injury Consensus Conference guidelines advocate for fever control in patients with TBI. In instances where primary and secondary interventions for intracranial hypertension prove ineffective, hypothermia treatment (35 &#x00B0;C&#x2010;36 &#x00B0;C) may be considered [<xref ref-type="bibr" rid="ref40">40</xref>]. In 2024, an expert consensus on TBI temperature management was specifically proposed. Experts believe that controlling the body temperature within 36.0 &#x00B0;C&#x2010;37.5 &#x00B0;C is the basis for managing patients with TBI [<xref ref-type="bibr" rid="ref41">41</xref>]. In the present study, the AI algorithm exhibits a propensity towards the lower temperature range of 36.5 &#x00B0;C&#x2010;36.8 &#x00B0;C. Compared with both lower and higher temperature ranges, this interval mitigates the risk of arrhythmias, compromised circulation, and coagulation abnormalities induced by hypothermia, without exacerbating intracranial pressure and cerebral oxygen metabolism disturbances. Finally, results from the out-of-bag analysis underscore age, heart rate, pO2, hemoglobin, and lactate as the primary state variables influencing the AI&#x2019;s selection of optimal MAP and temperature.</p><p>TBI is a complex condition and it is likely that no single factor entirely accounts for the disease outcome. Currently, there is active exploration of novel methods to enhance TBI monitoring, diagnosis, and assessment, particularly through the identification of new biomarkers for brain injury [<xref ref-type="bibr" rid="ref42">42</xref>]. In the field of neurocritical care, clinicians have begun to use multimodal monitoring approaches, utilizing various invasive or noninvasive methods. Treatment strategies are adjusted by measuring distinct cerebral physiological parameters (primarily cerebral blood flow, metabolism, and oxygenation), with temperature and blood pressure emerging as the most critical and readily observable influencing factors. A 2021 meta-analysis of multiple TBI management guidelines revealed that guideline implementation correlates with improved prognoses [<xref ref-type="bibr" rid="ref43">43</xref>]. However, as evidence grading standards have become increasingly stringent, the number of strong recommendations has progressively diminished, resulting in reduced clinical decision-making support [<xref ref-type="bibr" rid="ref44">44</xref>]. While there remains no unified opinion regarding therapeutic hypothermia for patients with cerebral herniation or severe intracranial hypertension, even when implementing hypothermia protocols, experts recommend targeting near-normal temperature ranges. Blood pressure management involves multiple variables including vascular volume, central venous pressure, and vascular tone, with individual therapeutic responses significantly impacting cerebral perfusion [<xref ref-type="bibr" rid="ref45">45</xref>]. Consequently, the urgent need for precision medicine in TBI treatment has become particularly evident.</p><p>RL emphasizes exploration and exploitation, and its core is the dynamic change of strategies and values. In this study, we adopt the dynamical weight of the behavioral differences between the clinician strategy and the AI strategy through the importance ratio (&#x03C1;). The core idea is to jointly offset the confounding effects through inverse probability weighting (IPW) and the outcome model. In addition, we also adopt a SOFA-stratified human-machine collaborative strategy. For mild patients with SOFA &#x003C;5, the historical decision-making data of clinicians are directly used as the Q-value function. This is equivalent to decoupling the effect of real-world doctor interventions (do-action) from the evolution of potential complications, avoiding new confounders introduced by AI due to wrong interventions in low-risk scenarios. For severe patients with SOFA &#x2265;5, we completely rely on the RL strategy. Here, it is assumed that the dynamic changes of complications play a stronger leading role in the evolution of the endogenous state of the human body than the intervention effect, so AI is allowed to explore freely. In addition, the state vector of our MDP includes dynamic physiological indicators (such as lactate level) closely related to complications, and these indicators can be used as proxy variables for complications to reduce unobserved confounders. For example, lactate has been proven to be an early warning indicator of shock and is continuously tracked in the state-update cycle (once every 2 h).</p><p>It should be noted that the MDP model cannot completely simulate the real world. To some extent, the MDP model is a simplified version of the real world. There may be the following deviations between the 2: for instance, the MDP assumes perfect observation and modeling of state transitions following medical interventions, whereas real-world scenarios involve measurement errors and undocumented latent factors (eg, genetic variations, pharmacokinetic differences). Additionally, the model presumes flawless execution of therapeutic actions, while practical implementations may encounter instrumentation inaccuracies. To mitigate these discrepancies, the doubly robust evaluation corrects 2 sources of errors through a hybrid mechanism of IPW and adjustment of model fitting residuals. First of all, IPW is used to quantify the probability ratio of AI strategy actions to doctor strategy actions (to address the difference in action frequencies of different strategies); residual compensation uses the real survival status of patients to correct prediction errors (to address the inherent bias of the MDP state transition model). For example, for the historical record of patient A, if the doctor actually used &#x201C;low temperature&#x201D; (and the patient survived), while the AI suggested &#x201C;high temperature&#x201D; (with an increased predicted survival probability in the MDP), we would calculate the importance weight of the &#x201C;high temperature&#x201D; action (based on the difference between the AI and doctor strategies); and the adjusted survival rate estimate (the 88% survival rate has been corrected by this method, rather than being the direct output of the original MDP). Constrained by ethical and privacy regulations, we are temporarily unable to directly apply AI strategies to real patients. In the next step of our plan, we are going to carry out &#x201C;AI-assisted decision-making&#x201D; (only for doctors&#x2019; reference) among a small number of critically ill patients to gradually accumulate actual efficacy data.</p><p>The management of patients with TBI in the ICU is a continuous and dynamic process. Clinicians in the ICU are inundated with extensive clinical data, necessitating timely and rational decision-making, a process that poses significant challenges. In contrast, RL algorithms dynamically adjust actions, such as MAP and temperature, by interacting with the environment to maximize cumulative rewards. By integrating clinical expertise and adaptive weights into the Q-value function, our algorithm enhances performance and optimizes the 28-day survival rate of patients, demonstrating clear practicality and applicability in clinical settings, particularly in the absence of multimodal monitoring. However, this study has several limitations. First, our sample is derived solely from the MIMIC database in the United States, lacking external validation from other regions. Second, the AI strategy focuses exclusively on key actions related to MAP and temperature, omitting other critical variables such as trauma factors, surgical interventions, and ventilator settings. Third, RL agents must learn from limited data and intervention variations collected offline. Using trial and error to explore all possible situations may conflict with medical ethics, limiting the ability of RL agents to try new behaviors to discover those with higher rewards and better long-term outcomes [<xref ref-type="bibr" rid="ref11">11</xref>]. As a result, this AI strategy may not represent the optimal solution for reducing TBI mortality rates. Further research is required to incorporate and validate additional relevant variables and parameters to refine and improve the AI strategy.</p><p>To mitigate potential overfitting in the RL model, we implemented several measures during the study. For instance, continuous variables were normalized, and an external validation dataset (MIMIC-III) was used to enhance and test the model&#x2019;s generalization ability. However, we acknowledge that there may still be residual overfitting risks. In future research, we will further optimize the model. On one hand, we plan to collect data from more diverse sources and types, including patients from different medical institutions and regions, to enrich the diversity of training data and mitigate the impact of data bias on the model. On the other hand, we will explore ways to improve the model architecture, such as incorporating more advanced regularization techniques to prevent the agent from over-relying on specific patterns in the training data and enhance its adaptability to new data and complex clinical scenarios. Additionally, we will conduct in-depth analyses of the RL agent&#x2019;s behavior across patients with different characteristics, performing sensitivity analyses to gain a deeper understanding of the model&#x2019;s decision-making process. This approach will help identify potential overfitting risks and guide targeted improvements. By addressing these aspects, we aim to develop a more robust and generalizable model for clinical decision support. The main algorithm of this study has been uploaded to <xref ref-type="supplementary-material" rid="app3">Multimedia Appendix 3</xref>.</p></sec><sec id="s4-2"><title>Conclusions</title><p>In summary, we used a novel RL algorithm to enhance the 28-day survival rate of patients with TBI in the ICU. This algorithm showed superior performance across the training set, validation set, and external validation, with AI-driven decision-making resulting in higher survival rates compared with clinician-directed care. This RL algorithm for patients with TBI indicates that a more personalized and targeted optimization of the vital signs is possible. It will assist clinicians in making decisions on an individualized patient-by-patient basis.</p></sec></sec></body><back><ack><p>We appreciate the support from the Science and Technology Development Project of Hangzhou (grant 202204A10), Zhejiang Provincial Medical and Health Technology Project (grant WKJ-ZJ-2315), Medical and Health Technology Project of Hangzhou (grant Z20220026), and Construction Fund of Medical Key Disciplines of Hangzhou (grant OO20200485).</p></ack><fn-group><fn fn-type="con"><p>HZ contributed to the original draft writing and performed data analysis. SZ was responsible for data analysis and visualization. MD contributed to the conceptualization and methodology and was involved in reviewing and editing the manuscript. WH was responsible for funding acquisition, conceptualization, and supervision. YZ provided supervision and contributed to reviewing and editing the manuscript. WZ, PN, and CW contributed to writing the original draft.</p></fn><fn fn-type="conflict"><p>None declared.</p></fn></fn-group><glossary><title>Abbreviations</title><def-list><def-item><term id="abb1">AI</term><def><p>artificial intelligence</p></def></def-item><def-item><term id="abb2">ICP</term><def><p>intracranial pressure</p></def></def-item><def-item><term id="abb3">ICU</term><def><p>intensive care unit</p></def></def-item><def-item><term id="abb4">IPW</term><def><p>inverse probability weighting</p></def></def-item><def-item><term id="abb5">MAP</term><def><p>mean arterial pressure</p></def></def-item><def-item><term id="abb6">MDP</term><def><p>Markov decision process</p></def></def-item><def-item><term id="abb7">MIMIC</term><def><p>Medical Information Mart for Intensive Care</p></def></def-item><def-item><term id="abb8">ML</term><def><p>machine learning</p></def></def-item><def-item><term id="abb9">RL</term><def><p>reinforcement learning</p></def></def-item><def-item><term id="abb10">SOFA</term><def><p>Sequential Organ Failure Assessment</p></def></def-item><def-item><term id="abb11">TBI</term><def><p>traumatic brain injury</p></def></def-item><def-item><term id="abb12">WD3QNE</term><def><p>weighted dueling double deep Q-network with embedded human expertise</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Capizzi</surname><given-names>A</given-names> </name><name name-style="western"><surname>Woo</surname><given-names>J</given-names> </name><name name-style="western"><surname>Verduzco-Gutierrez</surname><given-names>M</given-names> </name></person-group><article-title>Traumatic brain injury</article-title><source>Med Clin North Am</source><year>2020</year><month>03</month><volume>104</volume><issue>2</issue><fpage>213</fpage><lpage>238</lpage><pub-id pub-id-type="doi">10.1016/j.mcna.2019.11.001</pub-id></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Jiang</surname><given-names>JY</given-names> </name><name name-style="western"><surname>Gao</surname><given-names>GY</given-names> </name><name name-style="western"><surname>Feng</surname><given-names>JF</given-names> </name><etal/></person-group><article-title>Traumatic brain injury in China</article-title><source>Lancet Neurol</source><year>2019</year><month>03</month><volume>18</volume><issue>3</issue><fpage>286</fpage><lpage>295</lpage><pub-id pub-id-type="doi">10.1016/S1474-4422(18)30469-1</pub-id><pub-id pub-id-type="medline">30784557</pub-id></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Lulla</surname><given-names>A</given-names> </name><name name-style="western"><surname>Lumba-Brown</surname><given-names>A</given-names> </name><name name-style="western"><surname>Totten</surname><given-names>AM</given-names> </name><etal/></person-group><article-title>Prehospital guidelines for the management of traumatic brain injury - 3rd edition</article-title><source>Prehosp Emerg Care</source><year>2023</year><volume>27</volume><issue>5</issue><fpage>507</fpage><lpage>538</lpage><pub-id pub-id-type="doi">10.1080/10903127.2023.2187905</pub-id><pub-id pub-id-type="medline">37079803</pub-id></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Vella</surname><given-names>MA</given-names> </name><name name-style="western"><surname>Crandall</surname><given-names>ML</given-names> </name><name name-style="western"><surname>Patel</surname><given-names>MB</given-names> </name></person-group><article-title>Acute management of traumatic brain injury</article-title><source>Surg Clin North Am</source><year>2017</year><month>10</month><volume>97</volume><issue>5</issue><fpage>1015</fpage><lpage>1030</lpage><pub-id pub-id-type="doi">10.1016/j.suc.2017.06.003</pub-id><pub-id pub-id-type="medline">28958355</pub-id></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Su</surname><given-names>L</given-names> </name><name name-style="western"><surname>Li</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Liu</surname><given-names>S</given-names> </name><etal/></person-group><article-title>Establishment and Implementation of potential fluid therapy balance strategies for ICU sepsis patients based on reinforcement learning</article-title><source>Front Med (Lausanne)</source><year>2022</year><volume>9</volume><fpage>766447</fpage><pub-id pub-id-type="doi">10.3389/fmed.2022.766447</pub-id><pub-id pub-id-type="medline">35492326</pub-id></nlm-citation></ref><ref id="ref6"><label>6</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Komorowski</surname><given-names>M</given-names> </name><name name-style="western"><surname>Celi</surname><given-names>LA</given-names> </name><name name-style="western"><surname>Badawi</surname><given-names>O</given-names> </name><name name-style="western"><surname>Gordon</surname><given-names>AC</given-names> </name><name name-style="western"><surname>Faisal</surname><given-names>AA</given-names> </name></person-group><article-title>The artificial intelligence clinician learns optimal treatment strategies for sepsis in intensive care</article-title><source>Nat Med</source><year>2018</year><month>11</month><volume>24</volume><issue>11</issue><fpage>1716</fpage><lpage>1720</lpage><pub-id pub-id-type="doi">10.1038/s41591-018-0213-5</pub-id><pub-id pub-id-type="medline">30349085</pub-id></nlm-citation></ref><ref id="ref7"><label>7</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Stivi</surname><given-names>T</given-names> </name><name name-style="western"><surname>Padawer</surname><given-names>D</given-names> </name><name name-style="western"><surname>Dirini</surname><given-names>N</given-names> </name><name name-style="western"><surname>Nachshon</surname><given-names>A</given-names> </name><name name-style="western"><surname>Batzofin</surname><given-names>BM</given-names> </name><name name-style="western"><surname>Ledot</surname><given-names>S</given-names> </name></person-group><article-title>Using artificial intelligence to predict mechanical ventilation weaning success in patients with respiratory failure, including those with acute respiratory distress syndrome</article-title><source>J Clin Med</source><year>2024</year><month>03</month><day>5</day><volume>13</volume><issue>5</issue><fpage>1505</fpage><pub-id pub-id-type="doi">10.3390/jcm13051505</pub-id><pub-id pub-id-type="medline">38592696</pub-id></nlm-citation></ref><ref id="ref8"><label>8</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Peine</surname><given-names>A</given-names> </name><name name-style="western"><surname>Hallawa</surname><given-names>A</given-names> </name><name name-style="western"><surname>Bickenbach</surname><given-names>J</given-names> </name><etal/></person-group><article-title>Development and validation of a reinforcement learning algorithm to dynamically optimize mechanical ventilation in critical care</article-title><source>NPJ Digit Med</source><year>2021</year><month>02</month><day>19</day><volume>4</volume><issue>1</issue><fpage>32</fpage><pub-id pub-id-type="doi">10.1038/s41746-021-00388-6</pub-id><pub-id pub-id-type="medline">33608661</pub-id></nlm-citation></ref><ref id="ref9"><label>9</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>den Hengst</surname><given-names>F</given-names> </name><name name-style="western"><surname>Otten</surname><given-names>M</given-names> </name><name name-style="western"><surname>Elbers</surname><given-names>P</given-names> </name><name name-style="western"><surname>van Harmelen</surname><given-names>F</given-names> </name><name name-style="western"><surname>Fran&#x00E7;ois-Lavet</surname><given-names>V</given-names> </name><name name-style="western"><surname>Hoogendoorn</surname><given-names>M</given-names> </name></person-group><article-title>Guideline-informed reinforcement learning for mechanical ventilation in critical care</article-title><source>Artif Intell Med</source><year>2024</year><month>01</month><volume>147</volume><fpage>102742</fpage><pub-id pub-id-type="doi">10.1016/j.artmed.2023.102742</pub-id><pub-id pub-id-type="medline">38184349</pub-id></nlm-citation></ref><ref id="ref10"><label>10</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Nian</surname><given-names>R</given-names> </name><name name-style="western"><surname>Liu</surname><given-names>J</given-names> </name><name name-style="western"><surname>Huang</surname><given-names>B</given-names> </name></person-group><article-title>A review on reinforcement learning: introduction and applications in industrial process control</article-title><source>Comput Chem Eng</source><year>2020</year><month>08</month><volume>139</volume><fpage>106886</fpage><pub-id pub-id-type="doi">10.1016/j.compchemeng.2020.106886</pub-id></nlm-citation></ref><ref id="ref11"><label>11</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Liu</surname><given-names>S</given-names> </name><name name-style="western"><surname>See</surname><given-names>KC</given-names> </name><name name-style="western"><surname>Ngiam</surname><given-names>KY</given-names> </name><name name-style="western"><surname>Celi</surname><given-names>LA</given-names> </name><name name-style="western"><surname>Sun</surname><given-names>X</given-names> </name><name name-style="western"><surname>Feng</surname><given-names>M</given-names> </name></person-group><article-title>Reinforcement learning for clinical decision support in critical care: comprehensive review</article-title><source>J Med Internet Res</source><year>2020</year><month>07</month><day>20</day><volume>22</volume><issue>7</issue><fpage>e18477</fpage><pub-id pub-id-type="doi">10.2196/18477</pub-id><pub-id pub-id-type="medline">32706670</pub-id></nlm-citation></ref><ref id="ref12"><label>12</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wu</surname><given-names>X</given-names> </name><name name-style="western"><surname>Li</surname><given-names>R</given-names> </name><name name-style="western"><surname>He</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Yu</surname><given-names>T</given-names> </name><name name-style="western"><surname>Cheng</surname><given-names>C</given-names> </name></person-group><article-title>A value-based deep reinforcement learning model with human expertise in optimal treatment of sepsis</article-title><source>NPJ Digit Med</source><year>2023</year><month>02</month><day>2</day><volume>6</volume><issue>1</issue><fpage>15</fpage><pub-id pub-id-type="doi">10.1038/s41746-023-00755-5</pub-id><pub-id pub-id-type="medline">36732666</pub-id></nlm-citation></ref><ref id="ref13"><label>13</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Puterman</surname><given-names>ML</given-names> </name></person-group><article-title>Markov decision processes</article-title><source>Handbooks in Operations Research and Management Science</source><year>1990</year><publisher-name>Elsevier</publisher-name><fpage>331</fpage><lpage>434</lpage><pub-id pub-id-type="doi">10.1016/S0927-0507(05)80172-0</pub-id><pub-id pub-id-type="other">978-0-444-87473-3</pub-id></nlm-citation></ref><ref id="ref14"><label>14</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Alagoz</surname><given-names>O</given-names> </name><name name-style="western"><surname>Hsu</surname><given-names>H</given-names> </name><name name-style="western"><surname>Schaefer</surname><given-names>AJ</given-names> </name><name name-style="western"><surname>Roberts</surname><given-names>MS</given-names> </name></person-group><article-title>Markov decision processes: a tool for sequential decision making under uncertainty</article-title><source>Med Decis Making</source><year>2010</year><volume>30</volume><issue>4</issue><fpage>474</fpage><lpage>483</lpage><pub-id pub-id-type="doi">10.1177/0272989X09353194</pub-id><pub-id pub-id-type="medline">20044582</pub-id></nlm-citation></ref><ref id="ref15"><label>15</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Coronato</surname><given-names>A</given-names> </name><name name-style="western"><surname>Naeem</surname><given-names>M</given-names> </name><name name-style="western"><surname>De Pietro</surname><given-names>G</given-names> </name><name name-style="western"><surname>Paragliola</surname><given-names>G</given-names> </name></person-group><article-title>Reinforcement learning for intelligent healthcare applications: a survey</article-title><source>Artif Intell Med</source><year>2020</year><month>09</month><volume>109</volume><fpage>101964</fpage><pub-id pub-id-type="doi">10.1016/j.artmed.2020.101964</pub-id></nlm-citation></ref><ref id="ref16"><label>16</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Roepke</surname><given-names>RML</given-names> </name><name name-style="western"><surname>Besen</surname><given-names>B</given-names> </name><name name-style="western"><surname>Daltro-Oliveira</surname><given-names>R</given-names> </name><etal/></person-group><article-title>Predictive performance for hospital mortality of SAPS 3, SOFA, ISS, and new ISS in critically Ill trauma patients: a validation cohort study</article-title><source>J Intensive Care Med</source><year>2024</year><month>01</month><volume>39</volume><issue>1</issue><fpage>44</fpage><lpage>51</lpage><pub-id pub-id-type="doi">10.1177/08850666231188051</pub-id><pub-id pub-id-type="medline">37448331</pub-id></nlm-citation></ref><ref id="ref17"><label>17</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ferreira</surname><given-names>FL</given-names> </name><name name-style="western"><surname>Bota</surname><given-names>DP</given-names> </name><name name-style="western"><surname>Bross</surname><given-names>A</given-names> </name><name name-style="western"><surname>M&#x00E9;lot</surname><given-names>C</given-names> </name><name name-style="western"><surname>Vincent</surname><given-names>JL</given-names> </name></person-group><article-title>Serial evaluation of the SOFA score to predict outcome in critically ill patients</article-title><source>JAMA</source><year>2001</year><month>10</month><day>10</day><volume>286</volume><issue>14</issue><fpage>1754</fpage><lpage>1758</lpage><pub-id pub-id-type="doi">10.1001/jama.286.14.1754</pub-id><pub-id pub-id-type="medline">11594901</pub-id></nlm-citation></ref><ref id="ref18"><label>18</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>D&#x00FC;bendorfer</surname><given-names>C</given-names> </name><name name-style="western"><surname>Billeter</surname><given-names>AT</given-names> </name><name name-style="western"><surname>Seifert</surname><given-names>B</given-names> </name><name name-style="western"><surname>Keel</surname><given-names>M</given-names> </name><name name-style="western"><surname>Turina</surname><given-names>M</given-names> </name></person-group><article-title>Serial lactate and admission SOFA scores in trauma: an analysis of predictive value in 724 patients with and without traumatic brain injury</article-title><source>Eur J Trauma Emerg Surg</source><year>2013</year><month>02</month><volume>39</volume><issue>1</issue><fpage>25</fpage><lpage>34</lpage><pub-id pub-id-type="doi">10.1007/s00068-012-0212-z</pub-id><pub-id pub-id-type="medline">26814920</pub-id></nlm-citation></ref><ref id="ref19"><label>19</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Erickson</surname><given-names>SL</given-names> </name><name name-style="western"><surname>Killien</surname><given-names>EY</given-names> </name><name name-style="western"><surname>Wainwright</surname><given-names>M</given-names> </name><name name-style="western"><surname>Mills</surname><given-names>B</given-names> </name><name name-style="western"><surname>Vavilala</surname><given-names>MS</given-names> </name></person-group><article-title>Mean arterial pressure and discharge outcomes in severe pediatric traumatic brain injury</article-title><source>Neurocrit Care</source><year>2021</year><month>06</month><volume>34</volume><issue>3</issue><fpage>1017</fpage><lpage>1025</lpage><pub-id pub-id-type="doi">10.1007/s12028-020-01121-z</pub-id><pub-id pub-id-type="medline">33108627</pub-id></nlm-citation></ref><ref id="ref20"><label>20</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>de Havenon</surname><given-names>A</given-names> </name><name name-style="western"><surname>Petersen</surname><given-names>NH</given-names> </name><name name-style="western"><surname>Stulberg</surname><given-names>EL</given-names> </name><name name-style="western"><surname>Anadani</surname><given-names>M</given-names> </name><name name-style="western"><surname>Biffi</surname><given-names>A</given-names> </name><name name-style="western"><surname>Sheth</surname><given-names>KN</given-names> </name></person-group><article-title>Interaction of mean arterial pressure and blood pressure variability in critically Ill brain injured patients</article-title><source>Stroke</source><year>2022</year><month>12</month><volume>53</volume><issue>12</issue><fpage>e512</fpage><lpage>e514</lpage><pub-id pub-id-type="doi">10.1161/STROKEAHA.122.041274</pub-id><pub-id pub-id-type="medline">36367101</pub-id></nlm-citation></ref><ref id="ref21"><label>21</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Murray</surname><given-names>GD</given-names> </name><name name-style="western"><surname>Butcher</surname><given-names>I</given-names> </name><name name-style="western"><surname>McHugh</surname><given-names>GS</given-names> </name><etal/></person-group><article-title>Multivariable prognostic analysis in traumatic brain injury: results from the IMPACT study</article-title><source>J Neurotrauma</source><year>2007</year><month>02</month><volume>24</volume><issue>2</issue><fpage>329</fpage><lpage>337</lpage><pub-id pub-id-type="doi">10.1089/neu.2006.0035</pub-id><pub-id pub-id-type="medline">17375997</pub-id></nlm-citation></ref><ref id="ref22"><label>22</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Berry</surname><given-names>C</given-names> </name><name name-style="western"><surname>Ley</surname><given-names>EJ</given-names> </name><name name-style="western"><surname>Bukur</surname><given-names>M</given-names> </name><etal/></person-group><article-title>Redefining hypotension in traumatic brain injury</article-title><source>Injury</source><year>2012</year><month>11</month><volume>43</volume><issue>11</issue><fpage>1833</fpage><lpage>1837</lpage><pub-id pub-id-type="doi">10.1016/j.injury.2011.08.014</pub-id><pub-id pub-id-type="medline">21939970</pub-id></nlm-citation></ref><ref id="ref23"><label>23</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Brenner</surname><given-names>M</given-names> </name><name name-style="western"><surname>Stein</surname><given-names>DM</given-names> </name><name name-style="western"><surname>Hu</surname><given-names>PF</given-names> </name><name name-style="western"><surname>Aarabi</surname><given-names>B</given-names> </name><name name-style="western"><surname>Sheth</surname><given-names>K</given-names> </name><name name-style="western"><surname>Scalea</surname><given-names>TM</given-names> </name></person-group><article-title>Traditional systolic blood pressure targets underestimate hypotension-induced secondary brain injury</article-title><source>J Trauma Acute Care Surg</source><year>2012</year><month>05</month><volume>72</volume><issue>5</issue><fpage>1135</fpage><lpage>1139</lpage><pub-id pub-id-type="doi">10.1097/TA.0b013e31824af90b</pub-id><pub-id pub-id-type="medline">22673237</pub-id></nlm-citation></ref><ref id="ref24"><label>24</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kow</surname><given-names>CY</given-names> </name><name name-style="western"><surname>Harley</surname><given-names>B</given-names> </name><name name-style="western"><surname>Li</surname><given-names>C</given-names> </name><etal/></person-group><article-title>Escalating mean arterial pressure in severe traumatic brain injury: a prospective, observational study</article-title><source>J Neurotrauma</source><year>2021</year><month>07</month><day>15</day><volume>38</volume><issue>14</issue><fpage>1995</fpage><lpage>2002</lpage><pub-id pub-id-type="doi">10.1089/neu.2020.7289</pub-id><pub-id pub-id-type="medline">33280492</pub-id></nlm-citation></ref><ref id="ref25"><label>25</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ter Minassian</surname><given-names>A</given-names> </name><name name-style="western"><surname>Dub&#x00E9;</surname><given-names>L</given-names> </name><name name-style="western"><surname>Guilleux</surname><given-names>AM</given-names> </name><name name-style="western"><surname>Wehrmann</surname><given-names>N</given-names> </name><name name-style="western"><surname>Ursino</surname><given-names>M</given-names> </name><name name-style="western"><surname>Beydon</surname><given-names>L</given-names> </name></person-group><article-title>Changes in intracranial pressure and cerebral autoregulation in patients with severe traumatic brain injury</article-title><source>Crit Care Med</source><year>2002</year><month>07</month><volume>30</volume><issue>7</issue><fpage>1616</fpage><lpage>1622</lpage><pub-id pub-id-type="doi">10.1097/00003246-200207000-00036</pub-id><pub-id pub-id-type="medline">12130988</pub-id></nlm-citation></ref><ref id="ref26"><label>26</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kaur</surname><given-names>P</given-names> </name><name name-style="western"><surname>Sharma</surname><given-names>S</given-names> </name></person-group><article-title>Recent advances in pathophysiology of traumatic brain injury</article-title><source>Curr Neuropharmacol</source><year>2018</year><month>08</month><day>21</day><volume>16</volume><issue>8</issue><fpage>1224</fpage><lpage>1238</lpage><pub-id pub-id-type="doi">10.2174/1570159X15666170613083606</pub-id></nlm-citation></ref><ref id="ref27"><label>27</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Jin</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Lin</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Feng</surname><given-names>J feng</given-names> </name><name name-style="western"><surname>Jia</surname><given-names>F</given-names> </name><name name-style="western"><surname>Gao</surname><given-names>G</given-names> </name><name name-style="western"><surname>Jiang</surname><given-names>J yao</given-names> </name></person-group><article-title>Attenuation of cell death in injured cortex after post-traumatic brain injury moderate hypothermia: possible involvement of autophagy pathway</article-title><source>World Neurosurg</source><year>2015</year><month>08</month><volume>84</volume><issue>2</issue><fpage>420</fpage><lpage>430</lpage><pub-id pub-id-type="doi">10.1016/j.wneu.2015.03.039</pub-id></nlm-citation></ref><ref id="ref28"><label>28</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Dietrich</surname><given-names>WD</given-names> </name><name name-style="western"><surname>Alonso</surname><given-names>O</given-names> </name><name name-style="western"><surname>Busto</surname><given-names>R</given-names> </name><name name-style="western"><surname>Globus</surname><given-names>MT</given-names> </name><name name-style="western"><surname>Ginsberg</surname><given-names>MD</given-names> </name></person-group><article-title>Post-traumatic brain hypothermia reduces histopathological damage following concussive brain injury in the rat</article-title><source>Acta Neuropathol</source><year>1994</year><volume>87</volume><issue>3</issue><fpage>250</fpage><lpage>258</lpage><pub-id pub-id-type="doi">10.1007/BF00296740</pub-id><pub-id pub-id-type="medline">8009957</pub-id></nlm-citation></ref><ref id="ref29"><label>29</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Yokobori</surname><given-names>S</given-names> </name><name name-style="western"><surname>Gajavelli</surname><given-names>S</given-names> </name><name name-style="western"><surname>Mondello</surname><given-names>S</given-names> </name><etal/></person-group><article-title>Neuroprotective effect of preoperatively induced mild hypothermia as determined by biomarkers and histopathological estimation in a rat subdural hematoma decompression model</article-title><source>J Neurosurg</source><year>2013</year><month>02</month><volume>118</volume><issue>2</issue><fpage>370</fpage><lpage>380</lpage><pub-id pub-id-type="doi">10.3171/2012.10.JNS12725</pub-id></nlm-citation></ref><ref id="ref30"><label>30</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>McIntyre</surname><given-names>LA</given-names> </name><name name-style="western"><surname>Fergusson</surname><given-names>DA</given-names> </name><name name-style="western"><surname>H&#x00E9;bert</surname><given-names>PC</given-names> </name><name name-style="western"><surname>Moher</surname><given-names>D</given-names> </name><name name-style="western"><surname>Hutchison</surname><given-names>JS</given-names> </name></person-group><article-title>Prolonged therapeutic hypothermia after traumatic brain injury in adults: a systematic review</article-title><source>JAMA</source><year>2003</year><month>06</month><day>11</day><volume>289</volume><issue>22</issue><fpage>2992</fpage><lpage>2999</lpage><pub-id pub-id-type="doi">10.1001/jama.289.22.2992</pub-id><pub-id pub-id-type="medline">12799408</pub-id></nlm-citation></ref><ref id="ref31"><label>31</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Hutchison</surname><given-names>JS</given-names> </name><name name-style="western"><surname>Ward</surname><given-names>RE</given-names> </name><name name-style="western"><surname>Lacroix</surname><given-names>J</given-names> </name><etal/></person-group><article-title>Hypothermia therapy after traumatic brain injury in children</article-title><source>N Engl J Med</source><year>2008</year><month>06</month><day>5</day><volume>358</volume><issue>23</issue><fpage>2447</fpage><lpage>2456</lpage><pub-id pub-id-type="doi">10.1056/NEJMoa0706930</pub-id></nlm-citation></ref><ref id="ref32"><label>32</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Clifton</surname><given-names>GL</given-names> </name><name name-style="western"><surname>Valadka</surname><given-names>A</given-names> </name><name name-style="western"><surname>Zygun</surname><given-names>D</given-names> </name><etal/></person-group><article-title>Very early hypothermia induction in patients with severe brain injury (the national acute brain injury study: hypothermia II): a randomised trial</article-title><source>Lancet Neurol</source><year>2011</year><month>02</month><volume>10</volume><issue>2</issue><fpage>131</fpage><lpage>139</lpage><pub-id pub-id-type="doi">10.1016/S1474-4422(10)70300-8</pub-id></nlm-citation></ref><ref id="ref33"><label>33</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Qiu</surname><given-names>WS</given-names> </name><name name-style="western"><surname>Liu</surname><given-names>WG</given-names> </name><name name-style="western"><surname>Shen</surname><given-names>H</given-names> </name><etal/></person-group><article-title>Therapeutic effect of mild hypothermia on severe traumatic head injury</article-title><source>Chin J Traumatol</source><year>2005</year><month>02</month><volume>8</volume><issue>1</issue><fpage>27</fpage><lpage>32</lpage><pub-id pub-id-type="medline">15676086</pub-id></nlm-citation></ref><ref id="ref34"><label>34</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Clifton</surname><given-names>GL</given-names> </name><name name-style="western"><surname>Miller</surname><given-names>ER</given-names> </name><name name-style="western"><surname>Choi</surname><given-names>SC</given-names> </name><etal/></person-group><article-title>Lack of effect of induction of hypothermia after acute brain injury</article-title><source>N Engl J Med</source><year>2001</year><month>02</month><day>22</day><volume>344</volume><issue>8</issue><fpage>556</fpage><lpage>563</lpage><pub-id pub-id-type="doi">10.1056/NEJM200102223440803</pub-id></nlm-citation></ref><ref id="ref35"><label>35</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Lewis</surname><given-names>SR</given-names> </name><name name-style="western"><surname>Evans</surname><given-names>DJ</given-names> </name><name name-style="western"><surname>Butler</surname><given-names>AR</given-names> </name><name name-style="western"><surname>Schofield-Robinson</surname><given-names>OJ</given-names> </name><name name-style="western"><surname>Alderson</surname><given-names>P</given-names> </name></person-group><article-title>Hypothermia for traumatic brain injury</article-title><source>Cochrane Database Syst Rev</source><year>2017</year><month>09</month><day>21</day><volume>9</volume><issue>9</issue><fpage>CD001048</fpage><pub-id pub-id-type="doi">10.1002/14651858.CD001048.pub5</pub-id><pub-id pub-id-type="medline">28933514</pub-id></nlm-citation></ref><ref id="ref36"><label>36</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Carney</surname><given-names>N</given-names> </name><name name-style="western"><surname>Totten</surname><given-names>AM</given-names> </name><name name-style="western"><surname>O&#x2019;Reilly</surname><given-names>C</given-names> </name><etal/></person-group><article-title>Guidelines for the management of severe traumatic brain injury, fourth edition</article-title><source>Neurosurgery</source><year>2017</year><month>01</month><day>1</day><volume>80</volume><issue>1</issue><fpage>6</fpage><lpage>15</lpage><pub-id pub-id-type="doi">10.1227/NEU.0000000000001432</pub-id><pub-id pub-id-type="medline">27654000</pub-id></nlm-citation></ref><ref id="ref37"><label>37</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Albrecht</surname><given-names>RF</given-names> </name><name name-style="western"><surname>Wass</surname><given-names>CT</given-names> </name><name name-style="western"><surname>Lanier</surname><given-names>WL</given-names> </name></person-group><article-title>Occurrence of potentially detrimental temperature alterations in hospitalized patients at risk for brain injury</article-title><source>Mayo Clin Proc</source><year>1998</year><month>07</month><volume>73</volume><issue>7</issue><fpage>629</fpage><lpage>635</lpage><pub-id pub-id-type="doi">10.1016/S0025-6196(11)64885-4</pub-id><pub-id pub-id-type="medline">9663190</pub-id></nlm-citation></ref><ref id="ref38"><label>38</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Birg</surname><given-names>T</given-names> </name><name name-style="western"><surname>Ortolano</surname><given-names>F</given-names> </name><name name-style="western"><surname>Wiegers</surname><given-names>EJA</given-names> </name><etal/></person-group><article-title>Brain temperature influences intracranial pressure and cerebral perfusion pressure after traumatic brain injury: a CENTER-TBI study</article-title><source>Neurocrit Care</source><year>2021</year><month>12</month><volume>35</volume><issue>3</issue><fpage>651</fpage><lpage>661</lpage><pub-id pub-id-type="doi">10.1007/s12028-021-01294-1</pub-id></nlm-citation></ref><ref id="ref39"><label>39</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Puccio</surname><given-names>AM</given-names> </name><name name-style="western"><surname>Fischer</surname><given-names>MR</given-names> </name><name name-style="western"><surname>Jankowitz</surname><given-names>BT</given-names> </name><name name-style="western"><surname>Yonas</surname><given-names>H</given-names> </name><name name-style="western"><surname>Darby</surname><given-names>JM</given-names> </name><name name-style="western"><surname>Okonkwo</surname><given-names>DO</given-names> </name></person-group><article-title>Induced normothermia attenuates intracranial hypertension and reduces fever burden after severe traumatic brain injury</article-title><source>Neurocrit Care</source><year>2009</year><volume>11</volume><issue>1</issue><fpage>82</fpage><lpage>87</lpage><pub-id pub-id-type="doi">10.1007/s12028-009-9213-0</pub-id><pub-id pub-id-type="medline">19337864</pub-id></nlm-citation></ref><ref id="ref40"><label>40</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Chesnut</surname><given-names>R</given-names> </name><name name-style="western"><surname>Aguilera</surname><given-names>S</given-names> </name><name name-style="western"><surname>Buki</surname><given-names>A</given-names> </name><etal/></person-group><article-title>A management algorithm for adult patients with both brain oxygen and intracranial pressure monitoring: the Seattle International Severe Traumatic Brain Injury Consensus Conference (SIBICC)</article-title><source>Intensive Care Med</source><year>2020</year><month>05</month><volume>46</volume><issue>5</issue><fpage>919</fpage><lpage>929</lpage><pub-id pub-id-type="doi">10.1007/s00134-019-05900-x</pub-id><pub-id pub-id-type="medline">31965267</pub-id></nlm-citation></ref><ref id="ref41"><label>41</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Lavinio</surname><given-names>A</given-names> </name><name name-style="western"><surname>Coles</surname><given-names>JP</given-names> </name><name name-style="western"><surname>Robba</surname><given-names>C</given-names> </name><etal/></person-group><article-title>Targeted temperature control following traumatic brain injury: ESICM/NACCS best practice consensus recommendations</article-title><source>Crit Care</source><year>2024</year><month>05</month><day>20</day><volume>28</volume><issue>1</issue><fpage>170</fpage><pub-id pub-id-type="doi">10.1186/s13054-024-04951-x</pub-id><pub-id pub-id-type="medline">38769582</pub-id></nlm-citation></ref><ref id="ref42"><label>42</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Maas</surname><given-names>AIR</given-names> </name><name name-style="western"><surname>Menon</surname><given-names>DK</given-names> </name><name name-style="western"><surname>Manley</surname><given-names>GT</given-names> </name><etal/></person-group><article-title>Traumatic brain injury: progress and challenges in prevention, clinical care, and research</article-title><source>Lancet Neurol</source><year>2022</year><month>11</month><volume>21</volume><issue>11</issue><fpage>1004</fpage><lpage>1060</lpage><pub-id pub-id-type="doi">10.1016/S1474-4422(22)00309-X</pub-id></nlm-citation></ref><ref id="ref43"><label>43</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Hawryluk</surname><given-names>GWJ</given-names> </name><name name-style="western"><surname>Ghajar</surname><given-names>J</given-names> </name></person-group><article-title>Evolution and impact of the brain trauma foundation guidelines</article-title><source>Neurosurg</source><year>2021</year><month>12</month><volume>89</volume><issue>6</issue><fpage>1148</fpage><lpage>1156</lpage><pub-id pub-id-type="doi">10.1093/neuros/nyab357</pub-id></nlm-citation></ref><ref id="ref44"><label>44</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Volovici</surname><given-names>V</given-names> </name><name name-style="western"><surname>Steyerberg</surname><given-names>EW</given-names> </name><name name-style="western"><surname>Cnossen</surname><given-names>MC</given-names> </name><etal/></person-group><article-title>Evolution of evidence and guideline recommendations for the medical management of severe traumatic brain injury</article-title><source>J Neurotrauma</source><year>2019</year><month>11</month><day>15</day><volume>36</volume><issue>22</issue><fpage>3183</fpage><lpage>3189</lpage><pub-id pub-id-type="doi">10.1089/neu.2019.6474</pub-id><pub-id pub-id-type="medline">31280663</pub-id></nlm-citation></ref><ref id="ref45"><label>45</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Cruz Navarro</surname><given-names>J</given-names> </name><name name-style="western"><surname>Ponce Mejia</surname><given-names>LL</given-names> </name><name name-style="western"><surname>Robertson</surname><given-names>C</given-names> </name></person-group><article-title>A precision medicine agenda in traumatic brain injury</article-title><source>Front Pharmacol</source><year>2022</year><volume>13</volume><fpage>713100</fpage><pub-id pub-id-type="doi">10.3389/fphar.2022.713100</pub-id><pub-id pub-id-type="medline">35370671</pub-id></nlm-citation></ref></ref-list><app-group><supplementary-material id="app1"><label>Multimedia Appendix 1</label><p>Markov decision process description.</p><media xlink:href="jmir_v27i1e63847_app1.docx" xlink:title="DOCX File, 19 KB"/></supplementary-material><supplementary-material id="app2"><label>Multimedia Appendix 2</label><p>WD3QNE algorithm description. WD3QNE: weighted dueling double deep Q-network with embedded human expertise.</p><media xlink:href="jmir_v27i1e63847_app2.docx" xlink:title="DOCX File, 185 KB"/></supplementary-material><supplementary-material id="app3"><label>Multimedia Appendix 3</label><p>TBI_algorithm code. TBI: traumatic brain injury.</p><media xlink:href="jmir_v27i1e63847_app3.zip" xlink:title="ZIP File, 48 KB"/></supplementary-material></app-group></back></article>