<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMIR</journal-id>
      <journal-id journal-id-type="nlm-ta">J Med Internet Res</journal-id>
      <journal-title>Journal of Medical Internet Research</journal-title>
      <issn pub-type="epub">1438-8871</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v24i1e28659</article-id>
      <article-id pub-id-type="pmid">35044311</article-id>
      <article-id pub-id-type="doi">10.2196/28659</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>A Clinical Decision Support System for Sleep Staging Tasks With Explanations From Artificial Intelligence: User-Centered Design and Evaluation Study</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Mavragani</surname>
            <given-names>Amaryllis</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Lyell</surname>
            <given-names>David</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Grepo</surname>
            <given-names>Lorelie</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Hwang</surname>
            <given-names>Jeonghwan</given-names>
          </name>
          <degrees>BSc, MSc</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-4399-1556</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Lee</surname>
            <given-names>Taeheon</given-names>
          </name>
          <degrees>BSc, MSc</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-3777-1872</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Lee</surname>
            <given-names>Honggu</given-names>
          </name>
          <degrees>BSc, MSc, PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-6174-3030</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Byun</surname>
            <given-names>Seonjeong</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <address>
            <institution>Department of Neuropsychiatry</institution>
            <institution>Uijeongbu St Mary's Hospital, College of Medicine</institution>
            <institution>The Catholic University of Korea</institution>
            <addr-line>271, Chenbo-ro</addr-line>
            <addr-line>Uijeongbu-si, 11765</addr-line>
            <country>Republic of Korea</country>
            <phone>82 31 820 3946</phone>
            <email>sunjung.byun@gmail.com</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-0864-9835</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Looxid Labs</institution>
        <addr-line>Seoul</addr-line>
        <country>Republic of Korea</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Department of Neuropsychiatry</institution>
        <institution>Uijeongbu St Mary's Hospital, College of Medicine</institution>
        <institution>The Catholic University of Korea</institution>
        <addr-line>Uijeongbu-si</addr-line>
        <country>Republic of Korea</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Seonjeong Byun <email>sunjung.byun@gmail.com</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <month>1</month>
        <year>2022</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>19</day>
        <month>1</month>
        <year>2022</year>
      </pub-date>
      <volume>24</volume>
      <issue>1</issue>
      <elocation-id>e28659</elocation-id>
      <history>
        <date date-type="received">
          <day>9</day>
          <month>3</month>
          <year>2021</year>
        </date>
        <date date-type="rev-request">
          <day>5</day>
          <month>5</month>
          <year>2021</year>
        </date>
        <date date-type="rev-recd">
          <day>30</day>
          <month>6</month>
          <year>2021</year>
        </date>
        <date date-type="accepted">
          <day>1</day>
          <month>12</month>
          <year>2021</year>
        </date>
      </history>
      <copyright-statement>©Jeonghwan Hwang, Taeheon Lee, Honggu Lee, Seonjeong Byun. Originally published in the Journal of Medical Internet Research (https://www.jmir.org), 19.01.2022.</copyright-statement>
      <copyright-year>2022</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in the Journal of Medical Internet Research, is properly cited. The complete bibliographic information, a link to the original publication on https://www.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://www.jmir.org/2022/1/e28659" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Despite the unprecedented performance of deep learning algorithms in clinical domains, full reviews of algorithmic predictions by human experts remain mandatory. Under these circumstances, artificial intelligence (AI) models are primarily designed as clinical decision support systems (CDSSs). However, from the perspective of clinical practitioners, the lack of clinical interpretability and user-centered interfaces hinders the adoption of these AI systems in practice.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>This study aims to develop an AI-based CDSS for assisting polysomnographic technicians in reviewing AI-predicted sleep staging results. This study proposed and evaluated a CDSS that provides clinically sound explanations for AI predictions in a user-centered manner.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>Our study is based on a user-centered design framework for developing explanations in a CDSS that identifies why explanations are needed, what information should be contained in explanations, and how explanations can be provided in the CDSS. We conducted user interviews, user observation sessions, and an iterative design process to identify three key aspects for designing explanations in the CDSS. After constructing the CDSS, the tool was evaluated to investigate how the CDSS explanations helped technicians. We measured the accuracy of sleep staging and interrater reliability with macro-F1 and Cohen κ scores to assess quantitative improvements after our tool was adopted. We assessed qualitative improvements through participant interviews that established how participants perceived and used the tool.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>The user study revealed that technicians desire explanations that are relevant to key electroencephalogram (EEG) patterns for sleep staging when assessing the correctness of AI predictions. Here, technicians wanted explanations that could be used to evaluate whether the AI models properly locate and use these patterns during prediction. On the basis of this, information that is closely related to sleep EEG patterns was formulated for the AI models. In the iterative design phase, we developed a different visualization strategy for each pattern based on how technicians interpreted the EEG recordings with these patterns during their workflows. Our evaluation study on 9 polysomnographic technicians quantitatively and qualitatively investigated the helpfulness of the tool. For technicians with &#60;5 years of work experience, their quantitative sleep staging performance improved significantly from 56.75 to 60.59 with a <italic>P</italic> value of .05. Qualitatively, participants reported that the information provided effectively supported them, and they could develop notable adoption strategies for the tool.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>Our findings indicate that formulating clinical explanations for automated predictions using the information in the AI with a user-centered design process is an effective strategy for developing a CDSS for sleep staging.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>sleep staging</kwd>
        <kwd>clinical decision support</kwd>
        <kwd>user-centered design</kwd>
        <kwd>medical artificial intelligence</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <sec>
        <title>Background</title>
        <p>Polysomnography is a systematic process for collecting physiological parameters during sleep and is a diagnostic tool for evaluating various sleep disorders. Physiological recordings obtained from an electroencephalogram (EEG), electrooculogram (EOG), and electromyogram (EMG) were inspected by polysomnographic technicians to obtain important sleep parameters. Sleep staging is the process of identifying periodic changes in sleep stages. Typically, sleep stages are identified for every 30-second signal or epoch. On the basis of the American Academy of Sleep Medicine; wake status; 3 non–rapid eye movement (REM) stages, namely N1, N2, and N3; and REM stages were identified from polysomnographic recordings [<xref ref-type="bibr" rid="ref1">1</xref>]. Sleep staging is an essential task in sleep medicine, as sleep patterns contain critical information for analyzing overnight polysomnography. To be specific, crucial sleep parameters, such as the distribution of sleep stages, were extracted from the sleep staging results. For example, the N1 stage, which is difficult to differentiate from the wake stages, is used to calculate the time to sleep onset and total sleep time parameters. The detection of REM stages affects the calculation of REM latency after sleep, which is another important sleep parameter. Furthermore, the physiological characteristics associated with each sleep stage have been investigated to diagnose several sleep disorders, such as obstructive sleep apnea, narcolepsy, and REM sleep behavior disorder [<xref ref-type="bibr" rid="ref2">2</xref>,<xref ref-type="bibr" rid="ref3">3</xref>]. However, in polysomnography, sleep staging is a time-consuming and costly process because every epoch in an overnight recording must be manually inspected. Several algorithms have been introduced to automate this time-consuming and costly task [<xref ref-type="bibr" rid="ref4">4</xref>-<xref ref-type="bibr" rid="ref6">6</xref>].</p>
      </sec>
      <sec>
        <title>Artificial Intelligence–Based Clinical Decision Support Systems for Sleep Staging</title>
        <p>Advances in deep learning techniques have led to the development of clinical Artificial Intelligence (AI) systems with diagnostic performance comparable with that of human clinicians [<xref ref-type="bibr" rid="ref4">4</xref>,<xref ref-type="bibr" rid="ref7">7</xref>-<xref ref-type="bibr" rid="ref9">9</xref>]. These models have been introduced to automate time-consuming diagnoses and annotation procedures in clinical fields. However, the full automation of diagnostic processes, where algorithmic counterparts completely replace human clinicians, is presently not available owing to several challenges: the reliability of model predictions [<xref ref-type="bibr" rid="ref10">10</xref>], clinical soundness of model behaviors [<xref ref-type="bibr" rid="ref11">11</xref>], and social consensus on the replacement [<xref ref-type="bibr" rid="ref12">12</xref>]. Similarly, in sleep medicine, several studies have introduced AI algorithms to automate time-consuming sleep staging tasks, but manual reviews of the results after automated prediction remain mandatory [<xref ref-type="bibr" rid="ref13">13</xref>,<xref ref-type="bibr" rid="ref14">14</xref>]. Under these circumstances, systems to assist polysomnographic technicians during the review process are in demand. For example, prior work in human–AI interaction conceptualized a framework in which ambiguous portions in polysomnographic recordings are selectively prioritized for manual inspection [<xref ref-type="bibr" rid="ref15">15</xref>].</p>
        <p>Despite an increasing number of deep learning studies for sleep staging [<xref ref-type="bibr" rid="ref4">4</xref>,<xref ref-type="bibr" rid="ref5">5</xref>], implementing an adoptable clinical decision support system (CDSS) for clinical practice remains a challenging task. First, regarding clinical knowledge, most deep learning–based systems lack explainable factors, but clinical staff members require clinically sound systems [<xref ref-type="bibr" rid="ref10">10</xref>,<xref ref-type="bibr" rid="ref13">13</xref>,<xref ref-type="bibr" rid="ref16">16</xref>]. Thus, the CDSS should provide users with the necessary explanations. Second, the user interface of the AI system should be practical in clinical environments, where the time and resources of clinicians are constrained [<xref ref-type="bibr" rid="ref10">10</xref>,<xref ref-type="bibr" rid="ref17">17</xref>]. Therefore, a tool design that promotes readability and accessibility of the AI model from the viewpoint of clinical practitioners is indispensable for integrating AI-based decision-making into the workflow of human technicians [<xref ref-type="bibr" rid="ref10">10</xref>,<xref ref-type="bibr" rid="ref18">18</xref>]. The development of such CDSSs is crucial because these tools could alleviate these time-consuming and costly clinical tasks. Furthermore, proper algorithmic assistance can enhance the performance of clinical practitioners [<xref ref-type="bibr" rid="ref19">19</xref>].</p>
      </sec>
      <sec>
        <title>Study Objectives</title>
        <p>In this study, we introduce an AI-based CDSS for assisting polysomnographic technicians when reviewing the AI-generated sleep staging results. Our objective is to correctly understand the information required from the CDSS and to develop the system in a user-centered manner. Through an extensive user study, we determined the features desired in a sleep staging AI system that could successfully support sleep technicians. We formulated the development process of a tool to assist clinical practitioners effectively.</p>
      </sec>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Study Design</title>
        <p>This study aimed to understand what information should be provided to assist sleep technicians in collaborating with AI-based CDSS and to implement this system practically using a user-centered approach. Recent studies for designing explanations in CDSS propose frameworks that identify three key components from the perspectives of users: <italic>why</italic> information from CDSS is desired for a task, <italic>what</italic> content should be included in the explanation, and <italic>how</italic> explanations should be presented to users [<xref ref-type="bibr" rid="ref20">20</xref>,<xref ref-type="bibr" rid="ref21">21</xref>].</p>
        <p>To define why users need explanations from the CDSS, the context within which users request explanations must be understood first. This question relates to the needs of the users and the purpose of the explanations. The perspective of users should determine the explanatory objective concerning the information that should be provided. A possible set of information that can be considered from this phase includes explanations for the input data, explanations related to the domain knowledge used in the task, causal information on how the system generates an output, and how results change with changes in input data [<xref ref-type="bibr" rid="ref21">21</xref>,<xref ref-type="bibr" rid="ref22">22</xref>]. Finally, several design factors, such as the units and format used for explanations, are considered when determining how information should be provided.</p>
        <p>To design a CDSS within this framework, our development process included three phases: (1) interviews with polysomnographic technicians to identify why users might desire explanations from the CDSS when adopting AI-based sleep scoring systems, (2) user observations of how polysomnographic technicians score sleep stages from EEG recordings to determine the information that could help them, and (3) an iterative design process to construct a user-friendly CDSS interface that addresses the formulation of explanations in the system. After development, the polysomnographic technicians performed quantitative and qualitative evaluations of the system. In this section, we describe the objectives of each phase and explain how we conducted each phase (<xref rid="figure1" ref-type="fig">Figure 1</xref>).</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>Overall development process. AI: artificial intelligence.</p>
          </caption>
          <graphic xlink:href="jmir_v24i1e28659_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Participants</title>
        <p>Polysomnographic technicians with expertise in sleep staging were recruited for this study. Only technicians with a national license for medical laboratory technologists who were eligible to conduct polysomnography scoring were considered. To recruit participants with expertise in sleep scoring, we restricted their participation to those with experience in polysomnography scoring. We recruited 10 technicians to participate in the user interviews during the first phase and subsequent evaluation studies. We set the number of participants to 10, following previous studies on CDSSs, in which the number of participants was between 6 and 12 [<xref ref-type="bibr" rid="ref15">15</xref>,<xref ref-type="bibr" rid="ref23">23</xref>]. Among the technicians, we aimed to recruit 1 technician who could deeply engage in the development process by participating in the user observation and iterative design processes, which required regular meetings. We recruited technicians from secondary and tertiary hospitals rather than primary hospitals. Participants were recruited through emails sent to the polysomnographic technician community.</p>
        <p>We recruited participants and divided them into two groups, <italic>novice</italic> technicians with &#60;5 years of experience and <italic>senior</italic> technicians with &#62;5 years of experience, to evaluate whether there were any differences in the helpfulness of the CDSS based on the amount of experience. On the basis of the Rasmussen skill-, rule-, and knowledge-based behavior model [<xref ref-type="bibr" rid="ref24">24</xref>], we assumed that senior technicians would score stages subconsciously compared with novice technicians who consciously process the EEG characteristics. Here, we expected that novice technicians would more extensively refer to the provided explanation than senior technicians because novice technicians may find it difficult to quickly locate important EEG patterns. Thus, it was thought meaningful to investigate how our explanations affected technicians based on their skills.</p>
      </sec>
      <sec>
        <title>Development Procedure</title>
        <sec>
          <title>User Interview: Why Explanation Is Desired</title>
          <p>We conducted user interviews with polysomnographic technicians to investigate why technicians would need explanations from the CDSS when AI-based support systems were adopted for sleep staging. During the interview, we first presented several questions regarding user needs during manual sleep staging and the perceptions of technicians regarding the utility of previous sleep staging AI tools. The technicians were asked whether they were using the automated sleep staging programs. Furthermore, the reasons for not adopting such automated sleep staging programs were investigated. Upon further investigation, we established the context in which explanations from AI were desired when reviewing automated sleep staging results. A user study was conducted using structured interviews with the sample questions listed in <xref ref-type="boxed-text" rid="box1">Textbox 1</xref>.</p>
          <boxed-text id="box1" position="float">
            <title>Examples of interview questions in the user study.</title>
            <p>
              <bold>Topic and question statement</bold>
            </p>
            <p>
              <bold>User needs during manual sleep staging</bold>
            </p>
            <list list-type="order">
              <list-item>
                <p>How much time do you spend on a sleep staging task when performing polysomnography?</p>
              </list-item>
              <list-item>
                <p>For sleep staging tasks, on which features of electroencephalogram recordings do you mainly focus?</p>
              </list-item>
              <list-item>
                <p>Do you feel any need for assistance during sleep staging?</p>
              </list-item>
            </list>
            <p>
              <bold>Utility of sleep staging artificial intelligence (AI) tools</bold>
            </p>
            <list list-type="order">
              <list-item>
                <p>There are several AI programs that automate sleep staging tasks; are you adopting them in your workflow? If not, what are the problems associated with these programs?</p>
              </list-item>
              <list-item>
                <p>In which processes do you need AI programs to assist your sleep staging tasks?</p>
              </list-item>
              <list-item>
                <p>Assuming that there is an AI program that automates sleep staging tasks and sleep technicians only need to review its scorings, in which context are explanations desired for an efficient review process?</p>
              </list-item>
            </list>
          </boxed-text>
        </sec>
        <sec>
          <title>User Observation: What Information Should Be Contained in Explanations</title>
          <p>A user observation study was performed to understand the sleep staging conventions of clinical practitioners. From the observed sleep staging conventions, we aimed to construct a list of EEG characteristics to which technicians refer. During this study, hour-long weekly meetings were held over a month in which a participating technician scored EEG epochs in a think-aloud protocol. The technician was requested to verbally express how the information in the EEG recordings during sleep scoring was processed. Afterward, the technician reviewed the scoring with detailed explanations of the reasons for scoring the epochs with the annotated stages. The objective of these observation sessions was to formulate what information could assist technicians in reviewing predictions from AI algorithms. The observations were made based on characteristic EEG patterns such as sleep spindles, k-complexes, and frequency waves listed in the sleep manual [<xref ref-type="bibr" rid="ref1">1</xref>]. We investigated how the listed EEG characteristics were inspected in practice. Subsequently, we grouped the EEG features into typical explanations that our CDSS could provide.</p>
        </sec>
        <sec>
          <title>Iterative Design Process: How Explanations Can Be Presented</title>
          <p>We conducted an iterative design process with a technician to identify how explanations should be presented to CDSS users. For 2 months, we held weekly 2-hour meetings.</p>
          <p>The Template-Guided Neural Networks for Robust and Interpretable Sleep Stage Identification from EEG Recordings (TRIER) was selected as the AI algorithm for generating explanations. It is a convolutional neural network architecture used to process single-channel EEG data for sleep staging, and was proposed to extract clinically meaningful EEG wave shapes. This study demonstrated the possibility that features in the convolutional filters could be related to important EEG characteristics such as sleep spindles and k-complexes, with a sleep staging performance comparable with human raters with macro-F1 scores of 0.7-0.8 on public sleep data sets. We considered three components in the TRIER, namely convolutional filters, saliency values, and intermediate activation, as sources of information for generating explanations. These three components have been widely used in interpreting neural network operations in previous machine learning studies [<xref ref-type="bibr" rid="ref25">25</xref>-<xref ref-type="bibr" rid="ref30">30</xref>]. Detailed technical descriptions of these components are provided in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref29">29</xref>-<xref ref-type="bibr" rid="ref32">32</xref>].</p>
          <p>During the iterations, we aimed to investigate whether the information contained in the above components could provide the desired information obtained from the user observation study. In these sessions, the technician inspected the features obtained by the neural network components and expressed an opinion on whether they could provide sufficient explanation for the task. Information from the components was refined based on the feedback. Subsequently, we chose the exact component for generating explanations from the neural network components. However, because the information in neural networks is numerical, adequate visualization is required to enhance the user-friendliness of the explanations. Therefore, we iteratively collected feedback on the representation format of the explanations during the later sessions. The technician tested the prototype versions of the proposed tool and provided feedback in terms of their intuitiveness and helpfulness. Consequently, visualization strategies were constructed for the explanations and overall interfaces.</p>
        </sec>
      </sec>
      <sec>
        <title>Evaluation Study</title>
        <sec>
          <title>Data Set Preparation</title>
          <p>During the evaluation, technicians scored the sleep stages on sleep recordings from a public sleep EEG data set, the ISRUC-Sleep Dataset [<xref ref-type="bibr" rid="ref33">33</xref>]. These data contain polysomnographic recordings obtained from 100 subjects with evidence of sleep disorders. This data set was collected from the Sleep Medicine Centre of the Hospital of Coimbra University. We adopted the public data set for sleep staging to calculate sleep staging performance based on the ground-truth labels provided in the data set. The characteristics of the data sets are summarized in <xref ref-type="table" rid="table1">Table 1</xref>.</p>
          <p>The data were divided into a training set (80 participants), validation set (10 participants), and test set (10 participants). Only data samples from the training data set were used for training the deep learning models. We used the validation data set to select the model to be used for constructing the CDSS. The model with the best performance scores for the validation set was selected. The experimental results and corresponding findings were drawn exclusively from the test data set, which means that to avoid information leakage issues that may affect model accuracy, the data samples used for training the model were not used during the evaluation study.</p>
          <p>To construct the data set for the evaluation study, we randomly extracted 15-minute EEG segments from the EEG recordings in the test data set. EEG segments with no changes in sleep stages were excluded from the selected segments. We evaluated the sleep scoring performance with 15-minute segments rather than whole-night polysomnography to evaluate the helpfulness of the tool effectively. Considering that technicians often skim through recordings and pay attention to EEG epochs with stage changes, the effectiveness of the system might not be revealed or hindered by the back-and-forth temporal relations between the sleep stages. This evaluation configuration was also adopted in a previous CDSS study for sleep staging [<xref ref-type="bibr" rid="ref15">15</xref>]. In addition to the test set of 15-minute segments, we constructed a test data set composed of disconnected single epochs of EEG recordings to function as a stress test in which technicians must interpret the characteristics of an EEG epoch only from the EEG epoch without temporal relations derived from previous epochs. In these single-epoch test sets, because there are no previous or following epochs to provide information about the current epoch, the technicians can no longer rely on the scoring results from the previous epochs. The intention here was to clearly reveal the effectiveness of the explanations of the EEG characteristics.</p>
          <p>In summary, our test data set consisted of two EEG settings: <italic>a set of 15-minute</italic> EEG <italic>segments</italic> and <italic>a set of single-epoch</italic> EEG <italic>segments</italic>. All the participants scored the same set of EEG recordings. A figure explaining our data setting is provided in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p>
          <table-wrap position="float" id="table1">
            <label>Table 1</label>
            <caption>
              <p>Summary characteristics of the ISRUC-Sleep Dataset<sup>a</sup> (N=100).</p>
            </caption>
            <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
              <col width="30"/>
              <col width="470"/>
              <col width="500"/>
              <thead>
                <tr valign="top">
                  <td colspan="2">Characteristics</td>
                  <td>ISRUC-Sleep Dataset</td>
                </tr>
              </thead>
              <tbody>
                <tr valign="top">
                  <td colspan="3">
                    <bold>Gender, n (%)</bold>
                  </td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Male</td>
                  <td>55 (55)</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Female</td>
                  <td>45 (45)</td>
                </tr>
                <tr valign="top">
                  <td colspan="2">Age (years), mean (SD)</td>
                  <td>51 (16)</td>
                </tr>
              </tbody>
            </table>
            <table-wrap-foot>
              <fn id="table1fn1">
                <p><sup>a</sup>ISRUC-Sleep Dataset was scored based on American Association of Sleep Medicine Rules.</p>
              </fn>
            </table-wrap-foot>
          </table-wrap>
        </sec>
        <sec>
          <title>Experimental Setting</title>
          <p>During the experiments, we compared sleep staging performance under 2 different settings. The first was sleep scoring using our CDSS against the baseline AI, where technicians scored stages with AI systems that included only AI predictions provided without any explanation. The second was sleep scoring using our CDSS versus a conventional setting, where technicians need to score each epoch without the predictions by AI. We configured the baseline AI and conventional settings to compare sleep staging settings for our CDSS.</p>
          <p>To compare the sleep staging performance under different scoring settings, the technicians had to score each EEG epoch twice as follows: once each with our CDSS and the comparison setting. This was a fair comparison setting to evaluate the efficacy of the system because the characteristics of EEG segments affect sleep staging results significantly. Previous CDSS studies also employed this scoring setting to compare 2 different sleep staging support systems [<xref ref-type="bibr" rid="ref15">15</xref>]. We divided the test data set into 2 groups and used the first to compare our CDSS with the baseline AI system. A different portion of the test data set was used to compare our CDSS with the conventional sleep staging setting. We randomly permuted the order of the EEG segments and the staging settings. Furthermore, there was a washout period before the second reading of the EEG to avoid the memorization effect.</p>
        </sec>
        <sec>
          <title>Quantitative Evaluation</title>
          <p>On the basis of the scoring results obtained from the experiments, we evaluated 2 important performance aspects for assessing sleep staging results. First, we considered the accuracy with which the technicians scored the sleep stages under different sleep staging settings. Studies on previous CDSSs have witnessed enhancements in diagnostic accuracy when using the developed CDSS [<xref ref-type="bibr" rid="ref34">34</xref>-<xref ref-type="bibr" rid="ref37">37</xref>]. Similarly, we investigated how explanations from our system affect the accuracy of sleep staging. To estimate the classification performance after reviewing the AI predictions, the <italic>macro-F1 score</italic>, which was adopted in previous studies for evaluating sleep staging performance, was used as a performance metric [<xref ref-type="bibr" rid="ref4">4</xref>,<xref ref-type="bibr" rid="ref6">6</xref>]. We calculated the metric using the sleep stage labels provided in the public data set as the ground-truth sleep stages. The macro-F1 scores were calculated for each 15-minute EEG segment and a portion of single-epoch EEG recordings.</p>
          <p>Second, we evaluated whether interrater reliability was improved by adopting our CDSS. Interrater reliability between polysomnography technicians has been a critical issue in sleep staging because of the variability in interpreting polysomnography recordings among technicians [<xref ref-type="bibr" rid="ref38">38</xref>]. Following previous work in sleep medicine, which demonstrated that an adequate information system could reduce interrater reliability [<xref ref-type="bibr" rid="ref19">19</xref>], we investigated whether the information from our CDSS could enhance this property. With this objective, interrater reliability was measured using the <italic>Cohen κ score</italic> [<xref ref-type="bibr" rid="ref39">39</xref>]. Given the sleep staging results for a 15-minute EEG segment, we calculated the Cohen κ score for every possible pairing of technicians under the same sleep staging setting.</p>
          <p>In addition to the above metrics, we also evaluated whether participants could critically assess the accuracy of the model prediction in our system. We calculated the <italic>correction rates of the predictions for incorrectly classified epochs</italic>. Here, we measured the number of incorrectly predicted epochs revised by technicians and incorrectly predicted epochs revised to correct stages. We assumed that for incorrectly predicted epochs, the AI might generate erroneous explanations. Thus, it would be easier for participants to detect incorrectly predicted samples. To evaluate this aspect, we intentionally provided EEG epochs with incorrect AI predictions during the evaluation study.</p>
        </sec>
        <sec>
          <title>Qualitative Evaluation</title>
          <p>To investigate the extent to which the developed system supported polysomnographic technicians, we conducted semistructured postevaluation interviews. During the survey, we asked questions on a wide range of topics, such as the helpfulness of the information and how the participants adapted to the system. User trust in a system is an important aspect in designing AI-based CDSSs [<xref ref-type="bibr" rid="ref16">16</xref>,<xref ref-type="bibr" rid="ref40">40</xref>]. Thus, questions regarding user trust in the developed system were included in the postevaluation interviews. Questions regarding <italic>how information from the system was used in the sleep staging process</italic> were asked during the interviews to reveal notable adoption strategies. The sample interview questions are presented in <xref ref-type="boxed-text" rid="box2">Textbox 2</xref>.</p>
          <boxed-text id="box2" position="float">
            <title>Examples of interview questions in the qualitative evaluation.</title>
            <p>
              <bold>Topic and question statement</bold>
            </p>
            <p>
              <bold>User experience of the tool</bold>
            </p>
            <list list-type="order">
              <list-item>
                <p>Were the automated predictions and explanations provided in the clinical decision support systems helpful during the experiment? If not, which aspects were unhelpful?</p>
              </list-item>
              <list-item>
                <p>How did you perceive the provided explanations when the automated predictions agreed or disagreed with your decisions? Did it affect your trust in the system?</p>
              </list-item>
              <list-item>
                <p>Did the explanations correspond well to your perception of the important waveform patterns?</p>
              </list-item>
            </list>
            <p>
              <bold>Adoption strategy for the tool</bold>
            </p>
            <list list-type="order">
              <list-item>
                <p>How did you use each explanation strategy during the experiment?</p>
              </list-item>
              <list-item>
                <p>Was there any notable strategy for adopting the explanations rather than merely accepting the information in the explanations?</p>
              </list-item>
            </list>
          </boxed-text>
        </sec>
        <sec>
          <title>Statistical Analysis</title>
          <p>As mentioned in the previous section, each EEG epoch was read twice under 2 different settings as follows: once with our CDSS and once with comparison methods, the AI system without explanations, or the conventional staging setting without AI predictions. A statistical comparison was conducted to investigate whether the sleep staging performance was enhanced by adopting our CDSS compared with the comparison settings. Rather than comparing the distribution of the scores, we performed a paired comparison analysis in which we compared 2 sleep scoring performances on the same EEG segments under 2 different score settings. As scoring results could be affected by the complexities and characteristics of particular EEG epochs, it is critical to control these variabilities when assessing the significance of each performance. Furthermore, to exclude variability arising from interrater differences and only consider enhancements in performance by adopting our CDSS, we exclusively performed within-subject analysis for the macro-F1 scores.</p>
          <p>The Wilcoxon signed-rank test, a nonparametric statistical test for a set of matched samples [<xref ref-type="bibr" rid="ref41">41</xref>], was used to estimate the significance of the improvements by adopting the proposed test. For every participant, the data pairs were configured as follows: the macro-F1 and Cohen κ scores from the baseline or usual sleep staging setting (<italic>μ<sub>1</sub> κ<sub>1</sub></italic>) and the classification results when adopting our CDSS (<italic>μ<sub>2</sub> κ<sub>2</sub></italic>). For macro-F1 scores, for performance pairs from the same technician, there could be a clustering effect. Thus, we used the Wilcoxon signed-rank test for clustered data, which can account for clustering effects [<xref ref-type="bibr" rid="ref42">42</xref>-<xref ref-type="bibr" rid="ref44">44</xref>]. This test aimed to reveal whether performance was significantly enhanced by pairwise comparison when controlling for the variance arising from the interrater characteristics and the differences in EEG epochs. The significance of the results is reported by in terms of <italic>P</italic> values. We set the significance threshold at .05. All statistical and significance tests were performed using Python 3.6. We calculated the <italic>P</italic> values, sample sizes (n), <italic>z</italic> statistics, and effect sizes (<italic>r</italic>) using the Wilcoxon signed-rank test [<xref ref-type="bibr" rid="ref45">45</xref>].</p>
        </sec>
      </sec>
      <sec>
        <title>Ethical Considerations</title>
        <p>The study was approved by the institutional review board of the Uijeongbu St Mary’s Hospital (IRB number UC20ZADI0137), which waived the requirement for informed consent owing to the nature of the study. All EEG recordings used in this study were acquired from public data sets. All data were anonymized to ensure confidentiality.</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Participants Characteristics</title>
        <p>In total, 10 polysomnographic technicians were recruited from 3 different affiliations, 2 tertiary hospitals, and 1 secondary hospital. A total of 10% (1/10) of the technicians participated in the user interview, user observation sessions, and an iterative design process. We refer to this participant as technician A throughout the <italic>Results</italic> section. The other 90% (9/10) of the technicians participated in user interviews and evaluation studies. Among the 10 participants, 40% (4/10) were novice technicians with &#60;5 years of experience. A total of 60% (6/10) were senior technicians with &#62;5 years of experience. Technician A, who participated in the tool design process, was excluded from the evaluation study to avoid bias in favor of our CDSS. The participant characteristics are summarized in <xref ref-type="table" rid="table2">Table 2</xref>.</p>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>Participant characteristics.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="390"/>
            <col width="360"/>
            <col width="220"/>
            <thead>
              <tr valign="top">
                <td colspan="2">Demographics</td>
                <td>Novice technicians (n=4)</td>
                <td>Senior technicians (n=6)</td>
              </tr>
            </thead>
            <tbody>
              <tr>
                <td colspan="2">Experience (years), mean (SD)</td>
                <td>1.75 (1.3)</td>
                <td>12.5 (4.7)</td>
              </tr>
              <tr valign="top">
                <td colspan="4">
                  <bold>Affiliations, n (%)</bold>
                </td>
              </tr>
              <tr valign="top">
                <td/>
                <td>Secondary hospital</td>
                <td>2 (50)</td>
                <td>1 (17)</td>
              </tr>
              <tr valign="top">
                <td/>
                <td>Tertiary hospital</td>
                <td>2 (50)</td>
                <td>4 (83)</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
      </sec>
      <sec>
        <title>User Interview: Why Explanation Is Desired</title>
        <sec>
          <title>Reasons Technicians Did Not Use Automated Scoring Tools</title>
          <p>In total, 20% (2/10) of the participants had no experience of using automatic sleep scoring programs; the other participants preferred not to refer to the automated sleep staging results during sleep staging. The technicians answered that even when predictions were automatically recommended by the software, they removed the automated predictions and scored all the epochs themselves.</p>
          <p>In addition to the inaccuracies of algorithms, 50% (5/10) of the participants pointed out that <italic>a lack of explanation</italic> was the main barrier to adopting AI. One technician stated that, “The tools I have experienced do not provide any explanations for predictions, and I need to score every epoch all by myself again when reviewing the predictions.” Participants further called for the <italic>clinical soundness of their explanations</italic>. Another technician answered as follows:</p>
          <disp-quote>
            <p>There certainly exist clinical features to focus on for sleep staging. Even if automatic programs provided some sort of explanation, we need to check whether clinically appropriate EEG features, such as sleep spindles or amplitudes of alpha waves, are used in the algorithms.</p>
          </disp-quote>
          <p>These assertions reflect important considerations regarding explanations and the clinical soundness of algorithm procedures when designing a CDSS [<xref ref-type="bibr" rid="ref13">13</xref>,<xref ref-type="bibr" rid="ref16">16</xref>,<xref ref-type="bibr" rid="ref46">46</xref>].</p>
        </sec>
        <sec>
          <title>The Context in Which Explanations Will Be Used</title>
          <p>As stated in the subsection above, technicians requested that AI programs should provide clinically sound explanations for predictions, as reviewing the correctness of AI predictions without this information is no different from the manual annotation of sleep stages from scratch. Participants were requested to suggest desirable AI adoption scenarios during the interviews.</p>
          <p>In total, 80% (8/10) of the technicians wanted <italic>clinically sound explanations of the predictions</italic>. This is relevant to correct EEG patterns that are important for scoring sleep stages, where users can easily assess the correctness of the reasoning from the AI model based on the conventional manuals for the clinical task:</p>
          <disp-quote>
            <p>Some automatic programs seem to use procedures that differ from the widely adopted conventions shared among sleep technicians. I think information from AI should adhere to the procedures that we were trained with to make it easier for us to assess the rationale for the explanations.</p>
          </disp-quote>
          <p>Another technician said as follows:</p>
          <disp-quote>
            <p>When reviewing the AI predictions, I need grounds that convince me. As we are trained to stage based on standard manuals, explanations from AI should be closely related to these processes.</p>
          </disp-quote>
          <p>This point is especially critical in the clinical domain, where predefined sets of rules exist [<xref ref-type="bibr" rid="ref10">10</xref>].</p>
          <p>To summarize the trend of the interview answers, the technicians wanted explanations to validate the correctness of the AI predictions based on their clinical knowledge of sleep staging.</p>
        </sec>
      </sec>
      <sec>
        <title>User Observation: What Information Should Be Contained in Explanations</title>
        <p>By observing technician A for 1 month, we obtained an understanding of how technicians interpret EEG signals during sleep staging. Using the clinical context proposed in the manual [<xref ref-type="bibr" rid="ref1">1</xref>], we categorized EEG patterns based on how the technician processed the information in the EEG recordings. On the basis of how they processed each EEG feature, we created a list of explanation types that can be provided in the CDSS. The candidate explanation-type categories are listed in <xref ref-type="boxed-text" rid="box3">Textbox 3</xref>.</p>
        <boxed-text id="box3" position="float">
          <title>Explanation type to be provided in the clinical decision support systems.</title>
          <p>
            <bold>Explanation type 1: occurrence of signals</bold>
          </p>
          <p>For some patterns in electroencephalogram recordings, their presence is a clear indicator of certain sleep stages. For example, the occurrence of <italic>sleep spindles</italic> and <italic>k-complexes</italic> is strongly correlated to non–rapid eye movement (REM) 2 stages. In general, technicians search the entire signal to find these patterns. Therefore, proper detection of these patterns is sufficient information for polysomnographic technicians.</p>
          <p>
            <bold>Explanation type 2: ratio of signals</bold>
          </p>
          <p>Technician A claimed that estimating the ratio of <italic>delta waves</italic> in an epoch is the most critical part in identifying the non-REM 3 stages since the scoring manuals recommend annotating the epoch as stage non-REM 3 when delta waves account for more than 20% of the signals [<xref ref-type="bibr" rid="ref1">1</xref>]. The participant mentioned that technicians usually count the number of delta waves manually to correctly identify the non-REM 3 stages in sleep recordings.</p>
          <p>
            <bold>Explanation type 3: changes in signals</bold>
          </p>
          <p>Alpha waves are prevalently observed during the wake and non-REM 1 stages. However, the participant mentioned that changes in the amplitudes of <italic>alpha waves</italic> are important criteria for distinguishing non-REM 1 stages from the wake stages. According to the manual [<xref ref-type="bibr" rid="ref1">1</xref>], the alpha waves in the non-REM 1 stages normally exhibit smaller amplitudes compared with the wake stages. Technician A mentioned that perceiving the overall changes in alpha waves is the primary task in detecting boundaries between the wake and non-REM 1 stages.</p>
        </boxed-text>
      </sec>
      <sec>
        <title>Iterative Design Process: How Explanations Can Be Presented</title>
        <sec>
          <title>Refinements of Model Components</title>
          <p>In the first iteration session, convolutional filters obtained from TRIER [<xref ref-type="bibr" rid="ref28">28</xref>] were shown to technician A. The participant expressed the concern that although the convolutional filters contained morphologically significant shapes, undesirable features (high-frequency noises or low-frequency fluctuations) were also intermingled in the filter. The participant requested a refinement of the convolutional filters to improve the quality of the features. For example, in formulating filters that correspond to slow waves, the participant wanted to remove high-frequency components because delta waves have frequency components &#60;4 Hz. The filter refinement process is illustrated in <xref rid="figure2" ref-type="fig">Figure 2</xref>. Consequently, the convolutional filters contain features that correspond to the following EEG patterns: alpha waves, theta waves, delta waves, sawtooth waves, vertex sharp waves, sleep spindles, and k-complexes. After refinement, the filters are depicted in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p>
          <fig id="figure2" position="float">
            <label>Figure 2</label>
            <caption>
              <p>The filter refinement process is as follows: (1) delta waves were low-pass filtered, (2) regions outside the sleep spindle were zeroed-out, and (3) only the regions corresponding to k-complex features were selected and low-pass filtered afterward.</p>
            </caption>
            <graphic xlink:href="jmir_v24i1e28659_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
        </sec>
        <sec>
          <title>Selecting Information Source for Making Explanations</title>
          <p>Owing to the previous refinement process, components in the convolutional filter are clinically meaningful, and the corresponding features in the neural networks can be interpreted accordingly. For example, for a filter that was designated for k-complex–related features, the activation values generated from the filter were used to locate k-complexes in the data. Similarly, filters analogous to alpha waves can generate information related to alpha wave changes in the data.</p>
          <p>Therefore, we selected convolutional filters and activation values as basic elements to generate explanations of the model predictions. In addition to the 2 components, a saliency map [<xref ref-type="bibr" rid="ref29">29</xref>], or the gradient values of the input points, was also adopted to mark significant regions in making a prediction. This information indicates which regions in the data were important from the AI perspective. The neural network components used for generating explanations are summarized in <xref ref-type="boxed-text" rid="box4">Textbox 4</xref>.</p>
          <boxed-text id="box4" position="float">
            <title>List of information sources for generating information for the clinical decision support systems.</title>
            <p>
              <bold>Component 1: convolutional filters and their activation values</bold>
            </p>
            <p>Convolutional filters represent the clinical electroencephalogram patterns on which the model is based. Information regarding each clinical feature can be obtained from the activation values acquired from the filters.</p>
            <p>
              <bold>Component 2: saliency values calculated from neural networks</bold>
            </p>
            <p>Important regions, which significantly contributed to model predictions, can be inspected from the saliency values. Users can view the data from the perspective of the artificial intelligence model with saliency values.</p>
          </boxed-text>
        </sec>
        <sec>
          <title>Visualization Strategies</title>
          <p>Visualization strategies for each clinical feature were devised to provide information in an easily adopted form for sleep staging. Initially, plots of activation vectors without any processing were provided to the participating technician. In this case, the technician failed to use any of the information in the activation values. They emphasized that information should be compatible with the scoring procedure of the technician: “I cannot make use of the information. I want information to be provided in a form that can easily fit with my procedure.” This argument is closely linked to the critical issues in designing AI assistant tools: information from the system should be easily integrated into tasks of users [<xref ref-type="bibr" rid="ref47">47</xref>,<xref ref-type="bibr" rid="ref48">48</xref>].</p>
          <p>From this standpoint, we constructed different visualization strategies for each explanation type because conventions observed during the user observation study constituted the representative logical procedures for processing information in EEG recordings (<xref ref-type="boxed-text" rid="box5">Textbox 5</xref>).</p>
          <p><xref rid="figure3" ref-type="fig">Figure 3</xref> shows an in-tool visualization of the strategies. Through visualization, explanations from AI can be conveyed to users with their proper clinical contexts. Technician A attested that such explanations with enhanced readability could be easily adopted in the sleep staging process.</p>
          <boxed-text id="box5" position="float">
            <title>Four visualization strategies developed in this study. The first three strategies correspond to the interpreting conventions observed during the user observation study.</title>
            <p>
              <bold>Strategy 1: detection boxes</bold>
            </p>
            <p>Technician A claimed that the patterns, the presence of which alone indicates a sleep stage, should be more easily identified from the recordings. After that, it would be sufficient for the technicians to check whether the artificial intelligence (AI) model correctly located these patterns in the electroencephalogram (EEG) recordings. Therefore, we outlined detection boxes in regions that were detected to include the desired EEG patterns. Detection algorithms were implemented based on the amplitudes of the activation values calculated from the convolutional filters with the desired pattern.</p>
            <p>
              <bold>Strategy 2: delta wave blocks</bold>
            </p>
            <p>As polysomnographic technicians rely on the number of delta waves in the recordings, it is important to make the distribution of delta waves more visually intuitive. For these cases, technician A wanted to perceive each peak in delta waves as a single entity. We digitized the activation values from the convolutional filters of delta waves such that regions with activation values higher than a set threshold were encoded as 1 or otherwise as 0. Visualizing the encoded digits from the activation vectors, technician A perceived the information as blocks of slow waves and counted the number of blocks in the figure.</p>
            <p>
              <bold>Strategy 3: alpha activation surfaces</bold>
            </p>
            <p>For detecting changes of alpha waves on the boundary of the wake and non-REM 1 stages, the activation values generated directly from the convolution between the alpha wave filters and the input recordings were used. In this case, the participant requested fluctuations to be easily perceivable in the interface. During the iterations, technician A acknowledged that overall fluctuations of the activation values matched well with the perception of the changes. In such a setting, it was felt that the activation values amplify the changes in amplitudes. The technician asserted that these values are perceived as a surface area, thus making it more intuitive to sense overall changes in the signal.</p>
            <p>
              <bold>Strategy 4: saliency highlights</bold>
            </p>
            <p>The participant claimed that saliency values could be helpful for technicians as they could view the recordings from the AI perspective. In particular, the technician wanted to identify the EEG regions with high saliency values. Therefore, we highlighted the EEG recording segments with high saliency values.</p>
          </boxed-text>
          <fig id="figure3" position="float">
            <label>Figure 3</label>
            <caption>
              <p>Visualization strategies for each interpretation pattern. Information in electroencephalogram (EEG) recordings is visualized differently for each interpretation convention introduced in Textbox 5.</p>
            </caption>
            <graphic xlink:href="jmir_v24i1e28659_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
        </sec>
        <sec>
          <title>Constructing the System Outline</title>
          <p>In the original version of the system, we empower users to explore EEG recordings interactively with a filter selection box with which users could choose the desired EEG patterns and analyze signals based on the selected features. However, technician A observed that the system with an exploratory filter selection process might degrade its usability, as it disrupts the workflows of the technician:</p>
          <disp-quote>
            <p>Usually scoring of an epoch takes place in a short time, typically between five to ten seconds and even down to one second for easy cases. The selection process can be a bottleneck during scoring, thus other technicians are more likely to skip filter selection and score EEG epochs on their own.</p>
          </disp-quote>
          <p>This indicates that for clinical tasks where large numbers of data points are annotated in a relatively short time, the accessibility of desired features could be more important than interactivity. Therefore, instead of interacting with multiple features, we implemented an information system to be directly accessible.</p>
          <p>Specifically, rather than providing multiple sets of available information, we chose to show only the information corresponding to the predicted sleep stage for the epoch (<xref rid="figure4" ref-type="fig">Figure 4</xref>). For example, only the detection boxes of sleep spindles and k-complexes were provided for the epochs that were predicted as N2 stages. In this version, technician A acknowledged that the usability of information is enhanced compared with previous versions where multiple sets of information are provided, which results in too much information on a single screen and poor readability. Furthermore, the visualizations could explain the model predictions because the model provides only information relevant to its predictions. In <xref ref-type="table" rid="table3">Table 3</xref>, we list specific information provided for each stage.</p>
          <p>Similar to other tools for assisting sleep staging [<xref ref-type="bibr" rid="ref49">49</xref>], our system provides basic information from EEG recordings (<xref rid="figure5" ref-type="fig">Figure 5</xref>). It displays the hypnogram, a graph that visualizes changes in sleep stages over time, on top of its interface. Hypnograms for annotated stages from users as well as predictions from AI are provided so that users can monitor their editing process. A table that contains time information and annotated sleep stages is located on the right panel of the interface. The EEG and EOG recordings of an epoch are depicted in the main interface. In addition to the basic components, our CDSS provides the following information: AI-generated predictions and explanations from the AI model around the target EEG channel. Video recording provided in <xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref> demonstrates the overview of the CDSS and how users interact with it.</p>
          <fig id="figure4" position="float">
            <label>Figure 4</label>
            <caption>
              <p>Visualization strategies for the system. In the electroencephalogram (EEG), the recordings predicted as N2, k-complex, and sleep spindles are detected and visualized as red and blue boxes. In EEG recordings predicted as N3, detected delta waves are visualized as green blocks. Regions with high saliency values are highlighted in pink on the EEG recordings. Strategy is abbreviated as S.</p>
            </caption>
            <graphic xlink:href="jmir_v24i1e28659_fig4.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
          <table-wrap position="float" id="table3">
            <label>Table 3</label>
            <caption>
              <p>Information provided for each sleep stage.</p>
            </caption>
            <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
              <col width="200"/>
              <col width="200"/>
              <col width="200"/>
              <col width="200"/>
              <col width="200"/>
              <thead>
                <tr valign="top">
                  <td>Stage</td>
                  <td>Detection boxes</td>
                  <td>Delta wave blocks</td>
                  <td>Alpha activation surfaces</td>
                  <td>Saliency highlights</td>
                </tr>
              </thead>
              <tbody>
                <tr valign="top">
                  <td>Wake</td>
                  <td/>
                  <td/>
                  <td>✓</td>
                  <td>✓</td>
                </tr>
                <tr valign="top">
                  <td>N1<sup>a</sup></td>
                  <td/>
                  <td/>
                  <td>✓</td>
                  <td>✓</td>
                </tr>
                <tr valign="top">
                  <td>N2<sup>a</sup></td>
                  <td>✓</td>
                  <td/>
                  <td>✓</td>
                  <td>✓</td>
                </tr>
                <tr valign="top">
                  <td>N3<sup>a</sup></td>
                  <td/>
                  <td>✓</td>
                  <td/>
                  <td>✓</td>
                </tr>
                <tr valign="top">
                  <td>REM<sup>b</sup></td>
                  <td>✓</td>
                  <td/>
                  <td>✓</td>
                  <td>✓</td>
                </tr>
              </tbody>
            </table>
            <table-wrap-foot>
              <fn id="table3fn1">
                <p><sup>a</sup>N1-3: non–rapid eye movement stages 1-3.</p>
              </fn>
              <fn id="table3fn2">
                <p><sup>b</sup>REM: rapid eye movement.</p>
              </fn>
            </table-wrap-foot>
          </table-wrap>
          <fig id="figure5" position="float">
            <label>Figure 5</label>
            <caption>
              <p>The following is the overall interface of the system: (1) hypnogram; (2) scoring table lists the time sequence of model predictions and user annotations; (3) physiological recordings of the data set are visualized in the main panel; (4) predictions; and (5) explanations from artificial intelligence (AI) are in the middle of the interface. EEG: electroencephalogram; EOG: electrooculogram.</p>
            </caption>
            <graphic xlink:href="jmir_v24i1e28659_fig5.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
        </sec>
      </sec>
      <sec>
        <title>Quantitative Evaluation</title>
        <sec>
          <title>Accuracy</title>
          <p><xref rid="figure6" ref-type="fig">Figure 6</xref> illustrates macro-F1 scores. Each point in the scatter plots corresponds to the performance pair measured using the comparison method (AI only, <italic>μ<sub>1</sub></italic>) and our method (AI+explainer, <italic>μ<sub>2</sub></italic>) on the same test set.</p>
          <p>For the overall data set, which consisted of 15-minute EEG segments and single-epoch test set, there were no significant differences between baseline AI and our CDSS for results from all participants (<italic>µ</italic><sub>1</sub>=60.22; <italic>µ</italic><sub>2</sub>=61.31; <italic>P</italic>=.09; n=26; <italic>z</italic>=1.63; number of clusters=9)<italic>.</italic> However, a performance improvement can be observed when we restricted this data set to participants with &#60;5 years of work experience (<italic>µ</italic><sub>1</sub>=56.75; <italic>µ</italic><sub>2</sub>=60.59; <italic>P</italic>=.05; n=26; <italic>z</italic>=1.63; number of clusters=4). For a single-epoch test set, in which the utility of the methods could be more accurately determined, we also observed improvements in accuracy (<italic>µ</italic><sub>1</sub>=46.55; <italic>µ</italic><sub>2</sub>=50.28; <italic>P</italic>=.03; n=18; <italic>z</italic>=1.94; number of clusters=9).</p>
          <p>For the overall data set, compared with the conventional staging setting where predictions from the AI were not provided (<italic>µ<sub>1</sub></italic>), the macro-F1 scores were significantly improved when the technicians adopted our method (<italic>µ</italic><sub>1</sub>=43.23; <italic>µ</italic><sub>2</sub>=68.04; <italic>P</italic>=.004; n=17; <italic>z</italic>=2.64; number of clusters=9). Similarly, the macro-F1 scores improved for novice technicians when we compared our CDSS with a conventional sleep staging setting (<italic>µ</italic><sub>1</sub>=39.52; <italic>µ</italic><sub>2</sub>=70.58; <italic>P</italic>=.05; n=6; <italic>z</italic>=1.67; number of clusters=3).</p>
          <p>It should be noted that these results cannot be directly compared with sleep staging performance in other studies where performance was evaluated for whole-night sleep staging results. In our setting, the performance was measured from short segments of the EEG recordings. Here, sleep staging performances could be reported to be lower than the whole night sleep staging results in previous works, as the macro-F1 scores of the sleep staging results could be significantly affected by a few incorrect predictions.</p>
          <fig id="figure6" position="float">
            <label>Figure 6</label>
            <caption>
              <p>The improvements of the macro-F1 scores in various settings. The results measured as follows from (1) all participants and all test sets; (2) participants who have &#60;5 years of work experience and all test sets; and (3) all participants and single-epoch test sets are provided. AI: artificial intelligence.</p>
            </caption>
            <graphic xlink:href="jmir_v24i1e28659_fig6.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
        </sec>
        <sec>
          <title>Correction Rates for Incorrect Predictions From the AI</title>
          <p>For the erroneous predictions generated by the AI, statistics regarding the ratio of correctly revised epochs did not show significant differences between the baseline AI and our method. Among the 392 EEG epochs in the test data, 30.8% (121/392) were incorrectly predicted epochs from our network. Of the 30.8% (121/392) of the epochs, technicians detected 28.5% (112/392) of the incorrect predictions made with our CDSS, whereas 28.5% (112/392) were detected in the baseline AI. There were no significant differences in the detection rates of incorrectly predicted epochs (<italic>P</italic>=.39; n=9; <italic>z</italic>=0.28;<italic>r</italic>=0.11). Furthermore, among these incorrectly predicted epochs from AI detected by technicians, there were no significant differences in the ratio of correct revisions where technicians identified the correct stages for incorrect predictions (<italic>µ</italic><sub>1</sub>=15.68%; <italic>µ</italic><sub>2</sub>=16.42%; <italic>P</italic>=.76; n=9; <italic>z</italic>=−0.70; <italic>r</italic>=0.28). Similarly, for technicians with &#60;5 years of experience, we did not observe improvements in the detection rates of incorrectly predicted epochs (<italic>µ</italic><sub>1</sub>=27.19%; <italic>µ</italic><sub>2</sub>=30.52%; <italic>P</italic>=.86; n=9; <italic>z</italic>=−1.10; <italic>r</italic>=−0.60) and the ratio of correct revisions (<italic>µ</italic><sub>1</sub>=12.90%; <italic>µ</italic><sub>2</sub>=16.67%; <italic>P</italic>=.97; n=9; <italic>z</italic>=−1.83; <italic>r</italic>=−1.0)<italic>.</italic></p>
        </sec>
        <sec>
          <title>Interrater Reliability</title>
          <p>Scatter plots of the Cohen κ scores calculated for the baseline (κ<sub>1</sub>) and our method (κ<sub>2</sub>) are shown in <xref rid="figure7" ref-type="fig">Figure 7</xref>. As with the macro-F1 scores, improvements in reliability for all cases were observed (κ<sub>1</sub>=57.02; κ<sub>2</sub>=59.54; <italic>P</italic>=.07; n=212; <italic>z</italic>=1.49; <italic>r</italic>=0.17). However, more significant improvements were observed for the single-epoch test set (κ<sub>1</sub>=51.28; κ<sub>2</sub>=57.21; <italic>P</italic>=.002; n=64; <italic>z</italic>=2.80; <italic>r</italic>=0.57). According to the criteria for interpreting the Cohen κ score [<xref ref-type="bibr" rid="ref50">50</xref>], we obtained moderate agreement between technicians for both the proposed CDSS and baseline AI settings. Compared with usual sleep staging settings, where predictions from AI are not provided, interrater reliability also improved (κ<sub>1</sub>=35.06; κ<sub>2</sub>=77.48; <italic>P</italic>&#60;.001).</p>
          <fig id="figure7" position="float">
            <label>Figure 7</label>
            <caption>
              <p>The improvements of interrater reliability in various settings. The results measured from the following: (1) all participants and all test sets and (2) all participants and single-epoch test sets are provided. AI: artificial intelligence.</p>
            </caption>
            <graphic xlink:href="jmir_v24i1e28659_fig7.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
        </sec>
      </sec>
      <sec>
        <title>Qualitative Evaluation</title>
        <p>In this section, a qualitative evaluation of the tool is described. The adoption strategies developed by the participants and the perceived usability of the system are discussed.</p>
        <sec>
          <title>Helpful Aspects</title>
          <p>In total, 78% (7/9) of the participants responded that our system helped to review AI predictions. They reported that they referred to information from the CDSS when inspecting AI predictions. Several aspects of the utility of the tools were confirmed.</p>
        </sec>
        <sec>
          <title>Reducing the Workload Required for Pattern Recognition</title>
          <p>One of the most important utilities mentioned during the interviews was that our tool reduced the workload required to inspect EEG epochs. Analyzing EEG epochs is similar to visual searching tasks, where technicians must identify specific patterns in a visual environment [<xref ref-type="bibr" rid="ref51">51</xref>]. Participants attested that information visualized by saliency highlights and detection boxes drew their attention to important regions [<xref ref-type="bibr" rid="ref52">52</xref>]. Helped by the information provided by the detection boxes, participants were easily able to identify important regions for examination. On the basis of this information, they assessed whether the patterns were correctly detected by the algorithm. Similarly, for delta waves, participants replied that they only needed to count the number of delta wave blocks, and they did not need to check delta waves one by one from the EEG recordings.</p>
        </sec>
        <sec>
          <title>Providing Quantitative Visual Reference</title>
          <p>Interviewees stated that sleep staging tasks heavily depend on the subjective criteria of each technician. Perceiving the attenuation in alpha waves on the boundary of the wake and N1 stages is one of the most representative cases in which sleep staging is affected by subjective perception. In total, 55% (5/9) of the participants used the information from the alpha activation surfaces as a reference when they were not confident whether changes in the alpha wave were significant. Even the participant who was not satisfied with the system answered that this information was helpful for similar reasons.</p>
        </sec>
        <sec>
          <title>Unhelpful Aspects</title>
          <p>Two senior participants answered that they did not find the system helpful. They claimed that the specificity of the information from the system was below the desired level:</p>
          <disp-quote>
            <p>I am quite strict in detecting sleep EEG patterns like delta waves. However, from my point of view, too many regions were annotated as significant points. Thus, for many cases, I did not refer to the provided information.</p>
          </disp-quote>
          <p>This point emphasizes that for clinical tasks where decision-making may differ between individuals, personal differences among users should be considered to improve the usability of a tool. In our domain, for example, user interfaces that control the sensitivity and specificity of the pattern detection algorithm can be provided.</p>
          <p>Another technician did not refer to the system during the experiments because it was inconvenient to consider information other than the EEG recordings:</p>
          <disp-quote>
            <p>Due to time constraints, I am used to scoring stages speedily compared to other technicians. Thus, in some sense, I tend to rush during sleep staging sessions, and would rather not care about information in the system.</p>
          </disp-quote>
          <p>Interviews from the participants reveal that the tight time constraints in clinical environments are another challenge to be considered when designing a clinical support tool because changing the workflow of medical staff is a complicated task, which requires not only reliable performance but also usability in the workflows [<xref ref-type="bibr" rid="ref46">46</xref>].</p>
        </sec>
        <sec>
          <title>Explanations and Trust in the System</title>
          <p>In this section, we summarize how the explanations of our systems affect user trust during the experiments.</p>
        </sec>
        <sec>
          <title>Explanations in Agreed Epochs</title>
          <p>For epochs in which the predictions of the participants agreed with those of the AI, the technicians expressed trust in the predictions. In this case, the participants expressed that, as annotated regions from the system matched the important regions determined by the users, they were confidently able to continue to the next stages.</p>
        </sec>
        <sec>
          <title>Explanations in Epochs With Disagreement</title>
          <p>For the epochs where the predictions differed between the AI and users and were consequently modified by the technicians, the participants felt that the explanations clarified why AI predicted the epochs differently. In these cases, one technician argued:</p>
          <disp-quote>
            <p>Without explanations, I might jump to a conclusion that the accuracy of the AI is not at a desirable level. However, after being exposed to the explanations, there were some convincing factors in the AI-generated predictions, and I tried to re-investigate the recordings based on the AI explanations to find out whether my reasoning on predictions was strong enough to modify the AI prediction.</p>
          </disp-quote>
          <p>Even the technician, who did not think that the tool was helpful, reported:</p>
          <disp-quote>
            <p>At first, I totally disagreed with the predictions from the AI. Throughout the experiments, however, I found out that AI algorithms were reasonable on some level.</p>
          </disp-quote>
          <p>In summary, even though user trust could be severely affected when the AI predictions were inconsistent with those of the users, the explanations provided in our CDSS improved the trustworthiness of the system. In particular, the explanations helped users find reasonable aspects of AI predictions.</p>
        </sec>
        <sec>
          <title>Notable Adoption Strategies</title>
          <p>We obtained various sets of answers such as “I first focused on saliency highlights and then inspected signals based on the detection boxes” or “I used the alpha activation surfaces in detecting sleep arousal.” Among these answers, some notable strategies were identified. We discuss these strategies and their implications for human–AI collaboration in clinical domains.</p>
        </sec>
        <sec>
          <title>Rediscovery of Unnoticed Features</title>
          <p>Classifying REM stages solely from EEG recordings is deemed an impossible task, and sleep technicians prefer to rely on chin EMG and EOG recordings for REM stages [<xref ref-type="bibr" rid="ref1">1</xref>]. Thus, most participants had difficulty evaluating sleep epochs that were predicted as REM. However, several participants found that they could distinguish REM from the N1 stages with our method:</p>
          <disp-quote>
            <p>In general, I used to disregard sawtooth waves because REM has more distinct landmarks in EOG. However, the AI model correctly captured the sawtooth waves (patterns that occur in REM stages) and convinced me that the given epochs are from REM. Without such information, I might incorrectly score the stages.</p>
          </disp-quote>
          <p>These use cases demonstrate that our tool successfully conveyed important but easily dismissed features of the data. We believe that the above insight illustrates an important aspect of human–AI collaboration because alternative but significant viewpoints from the AI system successfully convinced the users during decision-making, which resulted in a performance enhancement.</p>
        </sec>
        <sec>
          <title>Attention Allocation</title>
          <p>In the adoption of a clinical AI system, to allocate their attention efficiently to weak portions of the algorithms, it is important for users to properly understand the strengths and weaknesses of AI. This scenario is termed the attention allocation [<xref ref-type="bibr" rid="ref18">18</xref>]. During the experiments, several technicians developed strategies related to attention allocation. One participant found that AI is vulnerable to misidentifying sweat artifacts as delta waves. This participant strategically allocated more attention to annotated regions in epochs that were predicted as N3 stages and inspected whether the annotated regions corresponded to delta waves or sweat artifacts. With this strategy, this participant effectively distinguished the N3 stages from epochs contaminated by sweat artifacts.</p>
          <p>In this adoption pattern, participants constructed strategies to successfully collaborate with AI [<xref ref-type="bibr" rid="ref48">48</xref>]. Specifically, users evaluated the convincing and unconvincing contributions of AI, thus efficiently allocating their attention during the adoption.</p>
        </sec>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <p>To our knowledge, this work is the first to construct an interpretable AI system using deep learning with a user-centered approach to develop a CDSS for sleep staging. Recent studies continuously demonstrate that deep learning algorithms can achieve comparable performance compared with human experts [<xref ref-type="bibr" rid="ref7">7</xref>-<xref ref-type="bibr" rid="ref9">9</xref>]. However, previous studies have found that human practitioners require information beyond the delivery of accurate predictions [<xref ref-type="bibr" rid="ref18">18</xref>]. To achieve this, we focused on constructing a CDSS that provides information compatible with the diagnostic patterns of human raters and helps technicians easily integrate the CDSS into their sleep staging procedures. Through user observation and an iterative design process, we obtained the desired characteristics for the explanations provided in the CDSS for sleep staging. First, clinical practitioners wanted explanations to help them validate AI predictions. Here, technicians wanted explanations that adhered to their clinical knowledge. Second, we categorized the type of information based on our observations of how technicians interpret the characteristics of each EEG. Finally, during the iterative design process, we confirmed that information contained in neural network components can be used to generate explanations for sleep staging results. The design components were updated iteratively based on the feedback of the technician.</p>
        <p>When evaluating the improvements in the sleep staging performance of all participants, we did not observe significant improvements when the <italic>P</italic> value was approximately .17. However, we believe that our quantitative evaluation contains meaningful results. First, when assessing the improvements for novice participants, we observed that the macro-F1 scores improved by 6.7% with a <italic>P</italic> value of .02. Considering that novice technicians may rely more on supportive information than expert technicians, this result implies that our tool could be effectively used to augment the sleep scoring capacities of novice technicians with acceptable sleep-relevant explanations. Second, when assessing the improvements in a single-epoch sleep scoring setting, which is similar to a stress-test configuration, we observed significant improvements in the macro-F1 scores and interrater reliability. Notable results in this stress test setting could indicate that our explanations to an extent helped technicians interpret the signal characteristics of each EEG epoch. Third, the results of the qualitative evaluation implied that the CDSS supports sleep staging by reducing the workload required for pattern recognition and providing quantitative visual references. These findings show that the developed system successfully and appropriately complemented the assessments of the technician by suggesting the desired information. Our tool obtained such utility for two reasons: (1) clinically sound features were correctly addressed and (2) information visualization was designed to be acceptable in conventional workflows of the sleep staging process.</p>
        <p>We identified further issues that should be considered when designing a CDSS. During the experiments, 20% (2/10) of the technicians indicated that our system was not adoptable for workflow in sleep staging. In particular, 10% (1/10) of the technicians expressed a lack of trust in the AI system. In general, the avoidance of algorithmic results is an important challenge to be addressed when adopting an automatic system [<xref ref-type="bibr" rid="ref53">53</xref>]. However, these challenges can be interpreted based on skill levels of the technician. For example, based on the Rasmussen skill-, rule-, and knowledge-based behavior model [<xref ref-type="bibr" rid="ref24">24</xref>], senior technicians may score sleep stages without consciously processing EEG information. Therefore, additional explanations from the CDSS can distract such technicians. In contrast, novice technicians may require additional cognitive processing of information in the recordings. Therefore, explanations from the CDSS could be helpful as guiding information during processing and lead to significant enhancements in their performance when a CDSS is adopted.</p>
        <p>In addition, over reliance on computer systems is another challenge to be considered when adopting decision support tools [<xref ref-type="bibr" rid="ref11">11</xref>,<xref ref-type="bibr" rid="ref54">54</xref>]. When adopting AI systems, there are cases where users tend to accept predictions from systems without any personal judgment on whether the information is correct. In our evaluation, the correction rates for erroneous predictions did not improve. This means that even though explanations from our system successfully operated as convincing components for model predictions, they failed to reveal ambiguous predictions. These results have implications for further development (eg, explanations for uncertainties in predictions can be provided by the model to inform users about ambiguous components in the data [<xref ref-type="bibr" rid="ref15">15</xref>]). The confidence of the predictions can be algorithmically estimated by the models as additional information [<xref ref-type="bibr" rid="ref55">55</xref>]. Such features can be integrated into a single framework to enhance safety in human–AI interaction systems.</p>
      </sec>
      <sec>
        <title>Comparison With Previous Work</title>
        <p>Previous studies on sleep staging have confirmed that suggestions for proper computational features can enhance sleep staging performance. An experimental study demonstrated that interrater reliability among technicians can be significantly improved by computer-derived suggestions [<xref ref-type="bibr" rid="ref19">19</xref>]. Taking inspiration from that study, our work proposes an approach to provide clinically meaningful information from deep learning models. Our results are consistent with those of a previous study, as the interrater reliability in our system improved significantly. However, our study differs from previous works in several respects. Although previous tools for sleep staging have already provided sleep-relevant information to users [<xref ref-type="bibr" rid="ref56">56</xref>,<xref ref-type="bibr" rid="ref57">57</xref>], these algorithms require a large amount of parameter tuning to fit each data set [<xref ref-type="bibr" rid="ref58">58</xref>]. In this sense, these works used a manually curated algorithm rather than augmenting the AI system to provide information. Furthermore, our work addresses the utility and readability of the system during the development of the tool, whereas previous studies preferentially focused on the calculation of sleep-relevant features in EEGs.</p>
        <p>In the domain of human–AI interaction, several deep learning models have been exploited as information sources to assist medical staff with appropriate knowledge. In these works, the usability of clinical AI was mainly addressed from the perspective of human users [<xref ref-type="bibr" rid="ref18">18</xref>]. A previous study surveyed how and what information should be provided for the analysis of radiographic images [<xref ref-type="bibr" rid="ref59">59</xref>]. This work stressed that information systems should be designed based on the user needs of clinical practitioners. Another study introduced a novel medical image retrieval system that leverages embedding vectors in a neural network to retrieve similar medical images [<xref ref-type="bibr" rid="ref47">47</xref>]. These bodies of work demonstrated that model interpretations should be formulated in the context of clinical knowledge, as users require medical explanations during adoption. Similarly, our work extensively investigates the desirable characteristics of sleep staging AI and proposes how these features can be provided in a CDSS.</p>
        <p>For sleep staging, an earlier work proposed an AI framework that prioritizes ambiguous epochs in EEG recordings with explanations in cases of uncertainty [<xref ref-type="bibr" rid="ref15">15</xref>]. However, this study proposes a conceptual framework rather than a practical implementation of the system. In this work, CDSS was simulated in a Wizard of Oz experiment, where human researchers manually generated the explanations in the system to address the ambiguous epochs in EEG recordings. In contrast, our work proposed a practical methodology for constructing meaningful information on sleep stages to assist clinical practitioners.</p>
      </sec>
      <sec>
        <title>Limitations and Future Directions</title>
        <p>The limitations that require consideration remain in our study. First, we conducted user observations and iterative design sessions with only 1 technician. Although manuals for sleep staging support most of the feedback of the technician, specific requirements defined by different users are necessary for user-centered design research. Moreover, during the experiments, participants reviewed the EEG recordings provided from a public EEG data set. As EEG recordings are highly heterogeneous across data sets and recording environments, the utility of the system could be more accurately evaluated if the neural network model was trained on data sets recorded in real-world settings.</p>
        <p>Our work is further limited as we only considered EEG recordings for sleep scoring. Assuming real-world sleep scoring is performed with polysomnographic recordings, which include EEG, EOG, EMG, and ECG signals, not considering other recordings may have affected the scoring results. For example, eye movement patterns are crucial factors in identifying the REM stages. As we have only provided information for EEG recordings, we could not offer explanations regarding eye movements. However, we believe that our overall design approach can be applied similarly in future studies to explain the output of other physiological sensors, such as EOG and EMG. These future studies could construct a more comprehensible CDSS for sleep scoring. In addition, evaluation of the CDSS system with whole-night polysomnography will provide more generalizable performance results that can be connected to the results of real-world polysomnography.</p>
        <p>The overall sample size may not be sufficient for comparison, considering that there are high interrater disagreements on the sleep staging results depending on individual characteristics. Even though we observed some notable improvements with the small sample size, a further evaluation study with more technicians is desirable. Furthermore, the representativeness of participants should be mentioned. Technicians from secondary and tertiary hospitals participated in the evaluation study, and technicians in primary hospitals were not considered. Technicians in primary hospitals may exhibit different tendencies toward the adoption of automatic sleep scoring tools. Thus, our study did not address this population. However, considering that technicians in primary hospitals tend to have relatively short experience in polysomnography, we believe that these results from novice technicians can be generalized to polysomnographic technicians in primary care.</p>
        <p>An AI system that provides explanations for predictions was compared with conventional models that do not provide explanations. In this setting, there was a risk that the participants were aware that the experimental objective was to construct and evaluate the effectiveness of the explanations. However, considering that explainable AI systems for medical domains have not been widely developed, many previous CDSS studies conducted experiments in a similar manner to our work [<xref ref-type="bibr" rid="ref15">15</xref>]. Nevertheless, the omission of blinding conditions is a limitation of our experimental setting.</p>
        <p>Although our work qualitatively evaluates how users perceive the CDSS, future work is required to quantitatively assess the usability of the tool. For example, the NASA-Task Load Index [<xref ref-type="bibr" rid="ref60">60</xref>] could be used in a prospective study to compare the required workload for each sleep scoring tool. Other aspects, such as time spent scoring sleep stages, could be estimated in a more controlled experimental setting. We believe that future studies will provide more insights into the usability of CDSS.</p>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>Our findings indicate that formulating clinical explanations for automated predictions using information from an AI system that incorporates a user-centered design process is an effective strategy for developing a CDSS for sleep staging. The proposed CDSS has great potential to be integrated into the real-world clinical workflow in a sleep laboratory based on the extent to which performance was improved and is highly useful in sleep staging.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>List of sleep-relevant electroencephalogram (EEG) patterns, refined convolutional filters, constructing data sets with EEG segments, and convolutional neural network components.</p>
        <media xlink:href="jmir_v24i1e28659_app1.docx" xlink:title="DOCX File , 108 KB"/>
      </supplementary-material>
      <supplementary-material id="app2">
        <label>Multimedia Appendix 2</label>
        <p>Demo video of the clinical decision support system.</p>
        <media xlink:href="jmir_v24i1e28659_app2.mp4" xlink:title="MP4 File  (MP4 Video), 29384 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">AI</term>
          <def>
            <p>artificial intelligence</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">CDSS</term>
          <def>
            <p>clinical decision support system</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">EEG</term>
          <def>
            <p>electroencephalogram</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">EMG</term>
          <def>
            <p>electromyogram</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">EOG</term>
          <def>
            <p>electrooculogram</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">REM</term>
          <def>
            <p>rapid eye movement</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">TRIER</term>
          <def>
            <p>The Template-Guided Neural Networks for Robust and Interpretable Sleep Stage Identification from EEG Recordings</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>This study was supported by Looxid Labs, Korea, and a grant of the Korea Health Technology R&#38;D Project through the Korea Health Industry Development Institute (KHIDI), funded by the Ministry of Health &#38; Welfare, Republic of Korea (grant HI21C0852). All the code and data sets used in this study are available on GitHub.</p>
    </ack>
    <fn-group>
      <fn fn-type="con">
        <p>All authors conceived the study, participated in the implementation of the tool, and wrote the manuscript. JH and TL conducted user interviews and user observation studies.</p>
      </fn>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Berry</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Brooks</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Gamaldo</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Harding</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Lloyd</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Marcus</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Vaughn</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>The AASM manual for the scoring of sleep and associated events: rules, terminology and technical specifications</article-title>
          <source>American Academy of Sleep Medicine, Darien, IL</source>
          <year>2015</year>
          <access-date>2021-12-29</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://aasm.org/resources/pdf/scoring-manual-preface.pdf">http://aasm.org/resources/pdf/scoring-manual-preface.pdf</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Subramanian</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Hesselbacher</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Mattewal</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Surani</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Gender and age influence the effects of slow-wave sleep on respiration in patients with obstructive sleep apnea</article-title>
          <source>Sleep Breath</source>
          <year>2013</year>
          <month>03</month>
          <day>16</day>
          <volume>17</volume>
          <issue>1</issue>
          <fpage>51</fpage>
          <lpage>6</lpage>
          <pub-id pub-id-type="doi">10.1007/s11325-011-0644-4</pub-id>
          <pub-id pub-id-type="medline">22252284</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Andlauer</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Moore</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Jouhier</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Drake</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Peppard</surname>
              <given-names>PE</given-names>
            </name>
            <name name-style="western">
              <surname>Han</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Hong</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Poli</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Plazzi</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>O'Hara</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Haffen</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Roth</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Young</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Mignot</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Nocturnal rapid eye movement sleep latency for identifying patients with narcolepsy/hypocretin deficiency</article-title>
          <source>JAMA Neurol</source>
          <year>2013</year>
          <month>07</month>
          <volume>70</volume>
          <issue>7</issue>
          <fpage>891</fpage>
          <lpage>902</lpage>
          <pub-id pub-id-type="doi">10.1001/jamaneurol.2013.1589</pub-id>
          <pub-id pub-id-type="medline">23649748</pub-id>
          <pub-id pub-id-type="pii">1684863</pub-id>
          <pub-id pub-id-type="pmcid">PMC4170796</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Supratak</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Dong</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Guo</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>DeepSleepNet: a model for automatic sleep stage scoring based on raw single-channel EEG</article-title>
          <source>IEEE Trans Neural Syst Rehabil Eng</source>
          <year>2017</year>
          <month>11</month>
          <volume>25</volume>
          <issue>11</issue>
          <fpage>1998</fpage>
          <lpage>2008</lpage>
          <pub-id pub-id-type="doi">10.1109/TNSRE.2017.2721116</pub-id>
          <pub-id pub-id-type="medline">28678710</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Perslev</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Jensen</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Darkner</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Jennum</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Igel</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>U-Time: a fully convolutional network for time series segmentation applied to sleep staging</article-title>
          <source>arXiv</source>
          <year>2019</year>
          <access-date>2021-12-29</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/pdf/1910.11162.pdf">https://arxiv.org/pdf/1910.11162.pdf</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Phan</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Andreotti</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Cooray</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>OY</given-names>
            </name>
            <name name-style="western">
              <surname>De Vos</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>SeqSleepNet: end-to-end hierarchical recurrent neural network for sequence-to-sequence automatic sleep staging</article-title>
          <source>IEEE Trans Neural Syst Rehabil Eng</source>
          <year>2019</year>
          <month>03</month>
          <volume>27</volume>
          <issue>3</issue>
          <fpage>400</fpage>
          <lpage>10</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/30716040"/>
          </comment>
          <pub-id pub-id-type="doi">10.1109/TNSRE.2019.2896659</pub-id>
          <pub-id pub-id-type="medline">30716040</pub-id>
          <pub-id pub-id-type="pmcid">PMC6481557</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sokolovsky</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Guerrero</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Paisarnsrisomsuk</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Ruiz</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Alvarez</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Human expert-level automated sleep stage prediction and feature discovery by deep convolutional neural networks</article-title>
          <source>Proceedings of the 17th International Workshop on Data Mining in Bionformatics (BIOKDD2018), in Conjunction with the ACM SIGKDD Conference on Knowledge Discovery and Data Mining KDD2018</source>
          <year>2018</year>
          <conf-name>17th International Workshop on Data Mining in Bionformatics (BIOKDD2018), in conjunction with the ACM SIGKDD Conference on Knowledge Discovery and Data Mining KDD2018</conf-name>
          <conf-date>Aug 20, 2018</conf-date>
          <conf-loc>London, UK</conf-loc>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://tinyurl.com/4ptkavfv"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Tschandl</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Rosendahl</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Akay</surname>
              <given-names>BN</given-names>
            </name>
            <name name-style="western">
              <surname>Argenziano</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Blum</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Braun</surname>
              <given-names>RP</given-names>
            </name>
            <name name-style="western">
              <surname>Cabo</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Gourhant</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Kreusch</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Lallas</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Lapins</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Marghoob</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Menzies</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Neuber</surname>
              <given-names>NM</given-names>
            </name>
            <name name-style="western">
              <surname>Paoli</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Rabinovitz</surname>
              <given-names>HS</given-names>
            </name>
            <name name-style="western">
              <surname>Rinner</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Scope</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Soyer</surname>
              <given-names>HP</given-names>
            </name>
            <name name-style="western">
              <surname>Sinz</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Thomas</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Zalaudek</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Kittler</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Expert-level diagnosis of nonpigmented skin cancer by combined convolutional neural networks</article-title>
          <source>JAMA Dermatol</source>
          <year>2019</year>
          <month>01</month>
          <day>01</day>
          <volume>155</volume>
          <issue>1</issue>
          <fpage>58</fpage>
          <lpage>65</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/30484822"/>
          </comment>
          <pub-id pub-id-type="doi">10.1001/jamadermatol.2018.4378</pub-id>
          <pub-id pub-id-type="medline">30484822</pub-id>
          <pub-id pub-id-type="pii">2716294</pub-id>
          <pub-id pub-id-type="pmcid">PMC6439580</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hannun</surname>
              <given-names>AY</given-names>
            </name>
            <name name-style="western">
              <surname>Rajpurkar</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Haghpanahi</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Tison</surname>
              <given-names>GH</given-names>
            </name>
            <name name-style="western">
              <surname>Bourn</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Turakhia</surname>
              <given-names>MP</given-names>
            </name>
            <name name-style="western">
              <surname>Ng</surname>
              <given-names>AY</given-names>
            </name>
          </person-group>
          <article-title>Cardiologist-level arrhythmia detection and classification in ambulatory electrocardiograms using a deep neural network</article-title>
          <source>Nat Med</source>
          <year>2019</year>
          <month>01</month>
          <volume>25</volume>
          <issue>1</issue>
          <fpage>65</fpage>
          <lpage>9</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/30617320"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41591-018-0268-3</pub-id>
          <pub-id pub-id-type="medline">30617320</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41591-018-0268-3</pub-id>
          <pub-id pub-id-type="pmcid">PMC6784839</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Shortliffe</surname>
              <given-names>EH</given-names>
            </name>
            <name name-style="western">
              <surname>Sepúlveda</surname>
              <given-names>MJ</given-names>
            </name>
          </person-group>
          <article-title>Clinical decision support in the era of artificial intelligence</article-title>
          <source>J Am Med Assoc</source>
          <year>2018</year>
          <month>12</month>
          <day>04</day>
          <volume>320</volume>
          <issue>21</issue>
          <fpage>2199</fpage>
          <lpage>200</lpage>
          <pub-id pub-id-type="doi">10.1001/jama.2018.17163</pub-id>
          <pub-id pub-id-type="medline">30398550</pub-id>
          <pub-id pub-id-type="pii">2713901</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Magrabi</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Ammenwerth</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>McNair</surname>
              <given-names>JB</given-names>
            </name>
            <name name-style="western">
              <surname>De Keizer</surname>
              <given-names>NF</given-names>
            </name>
            <name name-style="western">
              <surname>Hyppönen</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Nykänen</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Rigby</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Scott</surname>
              <given-names>PJ</given-names>
            </name>
            <name name-style="western">
              <surname>Vehko</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Wong</surname>
              <given-names>ZS</given-names>
            </name>
            <name name-style="western">
              <surname>Georgiou</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Artificial intelligence in clinical decision support: challenges for evaluating ai and practical implications</article-title>
          <source>Yearb Med Inform</source>
          <year>2019</year>
          <month>08</month>
          <volume>28</volume>
          <issue>1</issue>
          <fpage>128</fpage>
          <lpage>34</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.thieme-connect.com/DOI/DOI?10.1055/s-0039-1677903"/>
          </comment>
          <pub-id pub-id-type="doi">10.1055/s-0039-1677903</pub-id>
          <pub-id pub-id-type="medline">31022752</pub-id>
          <pub-id pub-id-type="pmcid">PMC6697499</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Smith</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Clinical AI: opacity, accountability, responsibility and liability</article-title>
          <source>AI Soc</source>
          <year>2020</year>
          <month>07</month>
          <day>25</day>
          <volume>36</volume>
          <issue>2</issue>
          <fpage>535</fpage>
          <lpage>45</lpage>
          <pub-id pub-id-type="doi">10.1007/s00146-020-01019-6</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Goldstein</surname>
              <given-names>CA</given-names>
            </name>
            <name name-style="western">
              <surname>Berry</surname>
              <given-names>RB</given-names>
            </name>
            <name name-style="western">
              <surname>Kent</surname>
              <given-names>DT</given-names>
            </name>
            <name name-style="western">
              <surname>Kristo</surname>
              <given-names>DA</given-names>
            </name>
            <name name-style="western">
              <surname>Seixas</surname>
              <given-names>AA</given-names>
            </name>
            <name name-style="western">
              <surname>Redline</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Westover</surname>
              <given-names>MB</given-names>
            </name>
          </person-group>
          <article-title>Artificial intelligence in sleep medicine: background and implications for clinicians</article-title>
          <source>J Clin Sleep Med</source>
          <year>2020</year>
          <month>04</month>
          <day>15</day>
          <volume>16</volume>
          <issue>4</issue>
          <fpage>609</fpage>
          <lpage>18</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/32065113"/>
          </comment>
          <pub-id pub-id-type="doi">10.5664/jcsm.8388</pub-id>
          <pub-id pub-id-type="medline">32065113</pub-id>
          <pub-id pub-id-type="pmcid">PMC7161463</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Goldstein</surname>
              <given-names>CA</given-names>
            </name>
            <name name-style="western">
              <surname>Berry</surname>
              <given-names>RB</given-names>
            </name>
            <name name-style="western">
              <surname>Kent</surname>
              <given-names>DT</given-names>
            </name>
            <name name-style="western">
              <surname>Kristo</surname>
              <given-names>DA</given-names>
            </name>
            <name name-style="western">
              <surname>Seixas</surname>
              <given-names>AA</given-names>
            </name>
            <name name-style="western">
              <surname>Redline</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Westover</surname>
              <given-names>MB</given-names>
            </name>
            <name name-style="western">
              <surname>Abbasi-Feinberg</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Aurora</surname>
              <given-names>RN</given-names>
            </name>
            <name name-style="western">
              <surname>Carden</surname>
              <given-names>KA</given-names>
            </name>
            <name name-style="western">
              <surname>Kirsch</surname>
              <given-names>DB</given-names>
            </name>
            <name name-style="western">
              <surname>Malhotra</surname>
              <given-names>RK</given-names>
            </name>
            <name name-style="western">
              <surname>Martin</surname>
              <given-names>JL</given-names>
            </name>
            <name name-style="western">
              <surname>Olson</surname>
              <given-names>EJ</given-names>
            </name>
            <name name-style="western">
              <surname>Ramar</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Rosen</surname>
              <given-names>CL</given-names>
            </name>
            <name name-style="western">
              <surname>Rowley</surname>
              <given-names>JA</given-names>
            </name>
            <name name-style="western">
              <surname>Shelgikar</surname>
              <given-names>AV</given-names>
            </name>
          </person-group>
          <article-title>Artificial intelligence in sleep medicine: an American Academy of Sleep Medicine position statement</article-title>
          <source>J Clin Sleep Med</source>
          <year>2020</year>
          <month>04</month>
          <day>15</day>
          <volume>16</volume>
          <issue>4</issue>
          <fpage>605</fpage>
          <lpage>7</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/32022674"/>
          </comment>
          <pub-id pub-id-type="doi">10.5664/jcsm.8288</pub-id>
          <pub-id pub-id-type="medline">32022674</pub-id>
          <pub-id pub-id-type="pmcid">PMC7161449</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Schaekermann</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Beaton</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Sanoubari</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Lim</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Larson</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Law</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Ambiguity-aware AI assistants for medical data analysis</article-title>
          <source>Proceedings of the CHI Conference on Human Factors in Computing Systems</source>
          <year>2020</year>
          <month>04</month>
          <conf-name>CHI '20: CHI Conference on Human Factors in Computing Systems</conf-name>
          <conf-date>April 25 - 30, 2020</conf-date>
          <conf-loc>Honolulu HI USA</conf-loc>
          <fpage>1</fpage>
          <lpage>14</lpage>
          <pub-id pub-id-type="doi">10.1145/3313831.3376506</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Holzinger</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Biemann</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Pattichis</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Kell</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>What do we need to build explainable AI systems for the medical domain?</article-title>
          <source>arXiv</source>
          <year>2017</year>
          <access-date>2021-12-29</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/pdf/1712.09923.pdf">https://arxiv.org/pdf/1712.09923.pdf</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Zimmerman</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Steinfeld</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Carey</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Antaki</surname>
              <given-names>JF</given-names>
            </name>
          </person-group>
          <article-title>Investigating the heart pump implant decision process: opportunities for decision support tools to help</article-title>
          <source>ACM Trans Comput Hum Interact</source>
          <year>2016</year>
          <month>05</month>
          <volume>2016</volume>
          <fpage>4477</fpage>
          <lpage>88</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/27833397"/>
          </comment>
          <pub-id pub-id-type="doi">10.1145/2858036.2858373</pub-id>
          <pub-id pub-id-type="medline">27833397</pub-id>
          <pub-id pub-id-type="pmcid">PMC5101017</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cai</surname>
              <given-names>CJ</given-names>
            </name>
            <name name-style="western">
              <surname>Winter</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Steiner</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Wilcox</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Terry</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>"Hello AI": uncovering the onboarding needs of medical practitioners for human-AI collaborative decision-making</article-title>
          <source>Proc ACM Hum-Comput Interact</source>
          <year>2019</year>
          <month>11</month>
          <day>07</day>
          <volume>3</volume>
          <issue>CSCW</issue>
          <fpage>1</fpage>
          <lpage>24</lpage>
          <pub-id pub-id-type="doi">10.1145/3359206</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Younes</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Hanly</surname>
              <given-names>PJ</given-names>
            </name>
          </person-group>
          <article-title>Minimizing interrater variability in staging sleep by use of computer-derived features</article-title>
          <source>J Clin Sleep Med</source>
          <year>2016</year>
          <month>10</month>
          <day>15</day>
          <volume>12</volume>
          <issue>10</issue>
          <fpage>1347</fpage>
          <lpage>56</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.5664/jcsm.6186"/>
          </comment>
          <pub-id pub-id-type="doi">10.5664/jcsm.6186</pub-id>
          <pub-id pub-id-type="medline">27448418</pub-id>
          <pub-id pub-id-type="pii">jc-00169-16</pub-id>
          <pub-id pub-id-type="pmcid">PMC5033736</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ribera</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Lapedriza</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Can we do better explanations? A proposal of user-centered explainable AI</article-title>
          <source>IUI Workshops</source>
          <year>2019</year>
          <access-date>2021-12-29</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://explainablesystems.comp.nus.edu.sg/2019/wp-content/uploads/2019/02/IUI19WS-ExSS2019-12.pdf">https://explainablesystems.comp.nus.edu.sg/2019/wp-content/uploads/2019/02/IUI19WS-ExSS2019-12.pdf</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Barda</surname>
              <given-names>AJ</given-names>
            </name>
            <name name-style="western">
              <surname>Horvat</surname>
              <given-names>CM</given-names>
            </name>
            <name name-style="western">
              <surname>Hochheiser</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>A qualitative research framework for the design of user-centered displays of explanations for machine learning model predictions in healthcare</article-title>
          <source>BMC Med Inform Decis Mak</source>
          <year>2020</year>
          <month>10</month>
          <day>08</day>
          <volume>20</volume>
          <issue>1</issue>
          <fpage>257</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcmedinformdecismak.biomedcentral.com/articles/10.1186/s12911-020-01276-x"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12911-020-01276-x</pub-id>
          <pub-id pub-id-type="medline">33032582</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12911-020-01276-x</pub-id>
          <pub-id pub-id-type="pmcid">PMC7545557</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lim</surname>
              <given-names>BY</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Abdul</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Why these explanations? Selecting intelligibility types for explanation goals</article-title>
          <source>IUI Workshops</source>
          <year>2019</year>
          <access-date>2021-12-29</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://explainablesystems.comp.nus.edu.sg/2019/wp-content/uploads/2019/02/IUI19WS-ExSS2019-20.pdf">https://explainablesystems.comp.nus.edu.sg/2019/wp-content/uploads/2019/02/IUI19WS-ExSS2019-20.pdf</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Steinfeld</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Zimmerman</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Unremarkable AI: fitting intelligent decision support into critical, clinical decision-making processes</article-title>
          <source>Proceedings of the 2019 CHI Conference on Human Factors in Computing Systems</source>
          <year>2019</year>
          <conf-name>CHI '19: CHI Conference on Human Factors in Computing Systems</conf-name>
          <conf-date>May 4 - 9, 2019</conf-date>
          <conf-loc>Glasgow Scotland UK</conf-loc>
          <fpage>1</fpage>
          <lpage>11</lpage>
          <pub-id pub-id-type="doi">10.1145/3290605.3300468</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rasmussen</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Skills, rules, and knowledge; signals, signs, and symbols, and other distinctions in human performance models</article-title>
          <source>IEEE Trans Syst Man Cybern</source>
          <year>1983</year>
          <month>05</month>
          <volume>SMC-13</volume>
          <issue>3</issue>
          <fpage>257</fpage>
          <lpage>66</lpage>
          <pub-id pub-id-type="doi">10.1109/TSMC.1983.6313160</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ravanelli</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Bengio</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Speaker recognition from raw waveform with SincNet</article-title>
          <source>Proceedings of the IEEE Spoken Language Technology Workshop (SLT)</source>
          <year>2018</year>
          <conf-name>IEEE Spoken Language Technology Workshop (SLT)</conf-name>
          <conf-date>Dec. 18-21, 2018</conf-date>
          <conf-loc>Athens, Greece</conf-loc>
          <pub-id pub-id-type="doi">10.1109/slt.2018.8639585</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Multilevel wavelet decomposition network for interpretable time series analysis</article-title>
          <source>Proceedings of the 24th ACM SIGKDD International Conference on Knowledge Discovery &#38; Data Mining</source>
          <year>2018</year>
          <conf-name>KDD '18: The 24th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining</conf-name>
          <conf-date>August 19 - 23, 2018</conf-date>
          <conf-loc>London United Kingdom</conf-loc>
          <fpage>2437</fpage>
          <lpage>46</lpage>
          <pub-id pub-id-type="doi">10.1145/3219819.3220060</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Park</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Nam</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Sample-level deep convolutional neural networks for music auto-tagging using raw waveforms</article-title>
          <source>arXiv</source>
          <year>2017</year>
          <access-date>2021-12-29</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1703.01789">https://arxiv.org/abs/1703.01789</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Hwang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>TRIER: template-guided neural networks for robust and interpretable sleep stage identification from EEG recordings</article-title>
          <source>arXiv</source>
          <year>2020</year>
          <access-date>2021-12-29</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/pdf/2009.05407.pdf">https://arxiv.org/pdf/2009.05407.pdf</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Simonyan</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Vedaldi</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Zisserman</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Deep inside convolutional networks: visualising image classification models and saliency maps</article-title>
          <source>arXiv</source>
          <year>2013</year>
          <access-date>2021-12-29</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/pdf/1312.6034.pdf">https://arxiv.org/pdf/1312.6034.pdf</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Pons</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Serra</surname>
              <given-names>X</given-names>
            </name>
          </person-group>
          <article-title>Randomly weighted CNNs for (music) audio classification</article-title>
          <source>Proceedings of the ICASSP 2019 - 2019 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)</source>
          <year>2019</year>
          <conf-name>ICASSP 2019 - 2019 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)</conf-name>
          <conf-date>May 12-17, 2019</conf-date>
          <conf-loc>Brighton, UK</conf-loc>
          <pub-id pub-id-type="doi">10.1109/icassp.2019.8682912</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yosinski</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Clune</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Nguyen</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Fuchs</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Lipson</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Understanding neural networks through deep visualization</article-title>
          <source>arXiv</source>
          <year>2015</year>
          <access-date>2021-12-29</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1506.06579">https://arxiv.org/abs/1506.06579</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Albawi</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Mohammed</surname>
              <given-names>TA</given-names>
            </name>
            <name name-style="western">
              <surname>Al-Zawi</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Understanding of a convolutional neural network</article-title>
          <source>Proceedings of the International Conference on Engineering and Technology (ICET)</source>
          <year>2017</year>
          <conf-name>International Conference on Engineering and Technology (ICET)</conf-name>
          <conf-date>Aug. 21-23, 2017</conf-date>
          <conf-loc>Antalya, Turkey</conf-loc>
          <pub-id pub-id-type="doi">10.1109/icengtechnol.2017.8308186</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Khalighi</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Sousa</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Santos</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Nunes</surname>
              <given-names>U</given-names>
            </name>
          </person-group>
          <article-title>ISRUC-Sleep: a comprehensive public dataset for sleep researchers</article-title>
          <source>Comput Methods Programs Biomed</source>
          <year>2016</year>
          <month>02</month>
          <volume>124</volume>
          <fpage>180</fpage>
          <lpage>92</lpage>
          <pub-id pub-id-type="doi">10.1016/j.cmpb.2015.10.013</pub-id>
          <pub-id pub-id-type="medline">26589468</pub-id>
          <pub-id pub-id-type="pii">S0169-2607(15)00273-4</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhu</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Cimino</surname>
              <given-names>JJ</given-names>
            </name>
          </person-group>
          <article-title>Clinicians' evaluation of computer-assisted medication summarization of electronic medical records</article-title>
          <source>Comput Biol Med</source>
          <year>2015</year>
          <month>04</month>
          <volume>59</volume>
          <fpage>221</fpage>
          <lpage>31</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/24393492"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.compbiomed.2013.12.006</pub-id>
          <pub-id pub-id-type="medline">24393492</pub-id>
          <pub-id pub-id-type="pii">S0010-4825(13)00357-0</pub-id>
          <pub-id pub-id-type="pmcid">PMC4063892</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cha</surname>
              <given-names>KH</given-names>
            </name>
            <name name-style="western">
              <surname>Hadjiiski</surname>
              <given-names>LM</given-names>
            </name>
            <name name-style="western">
              <surname>Cohan</surname>
              <given-names>RH</given-names>
            </name>
            <name name-style="western">
              <surname>Chan</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Caoili</surname>
              <given-names>EM</given-names>
            </name>
            <name name-style="western">
              <surname>Davenport</surname>
              <given-names>MS</given-names>
            </name>
            <name name-style="western">
              <surname>Samala</surname>
              <given-names>RK</given-names>
            </name>
            <name name-style="western">
              <surname>Weizer</surname>
              <given-names>AZ</given-names>
            </name>
            <name name-style="western">
              <surname>Alva</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Kirova-Nedyalkova</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Shampain</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Meyer</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Barkmeier</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Woolen</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Shankar</surname>
              <given-names>PR</given-names>
            </name>
            <name name-style="western">
              <surname>Francis</surname>
              <given-names>IR</given-names>
            </name>
            <name name-style="western">
              <surname>Palmbos</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Diagnostic accuracy of CT for prediction of bladder cancer treatment response with and without computerized decision support</article-title>
          <source>Acad Radiol</source>
          <year>2019</year>
          <month>09</month>
          <volume>26</volume>
          <issue>9</issue>
          <fpage>1137</fpage>
          <lpage>45</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/30424999"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.acra.2018.10.010</pub-id>
          <pub-id pub-id-type="medline">30424999</pub-id>
          <pub-id pub-id-type="pii">S1076-6332(18)30474-4</pub-id>
          <pub-id pub-id-type="pmcid">PMC6510656</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kostopoulou</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Rosen</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Round</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Wright</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Douiri</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Delaney</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>Early diagnostic suggestions improve accuracy of GPs: a randomised controlled trial using computer-simulated patients</article-title>
          <source>Br J Gen Pract</source>
          <year>2015</year>
          <month>01</month>
          <volume>65</volume>
          <issue>630</issue>
          <fpage>49</fpage>
          <lpage>54</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://bjgp.org/cgi/pmidlookup?view=long&#38;pmid=25548316"/>
          </comment>
          <pub-id pub-id-type="doi">10.3399/bjgp15X683161</pub-id>
          <pub-id pub-id-type="medline">25548316</pub-id>
          <pub-id pub-id-type="pii">65/630/e49</pub-id>
          <pub-id pub-id-type="pmcid">PMC4276007</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Verdoorn</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Kwint</surname>
              <given-names>HF</given-names>
            </name>
            <name name-style="western">
              <surname>Hoogland</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Gussekloo</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Bouvy</surname>
              <given-names>ML</given-names>
            </name>
          </person-group>
          <article-title>Drug-related problems identified during medication review before and after the introduction of a clinical decision support system</article-title>
          <source>J Clin Pharm Ther</source>
          <year>2018</year>
          <month>04</month>
          <volume>43</volume>
          <issue>2</issue>
          <fpage>224</fpage>
          <lpage>31</lpage>
          <pub-id pub-id-type="doi">10.1111/jcpt.12637</pub-id>
          <pub-id pub-id-type="medline">28971492</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref38">
        <label>38</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Whitney</surname>
              <given-names>CW</given-names>
            </name>
            <name name-style="western">
              <surname>Gottlieb</surname>
              <given-names>DJ</given-names>
            </name>
            <name name-style="western">
              <surname>Redline</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Norman</surname>
              <given-names>RG</given-names>
            </name>
            <name name-style="western">
              <surname>Dodge</surname>
              <given-names>RR</given-names>
            </name>
            <name name-style="western">
              <surname>Shahar</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Surovec</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Nieto</surname>
              <given-names>FJ</given-names>
            </name>
          </person-group>
          <article-title>Reliability of scoring respiratory disturbance indices and sleep staging</article-title>
          <source>Sleep</source>
          <year>1998</year>
          <month>11</month>
          <day>01</day>
          <volume>21</volume>
          <issue>7</issue>
          <fpage>749</fpage>
          <lpage>57</lpage>
          <pub-id pub-id-type="doi">10.1093/sleep/21.7.749</pub-id>
          <pub-id pub-id-type="medline">11286351</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref39">
        <label>39</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cohen</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>A coefficient of agreement for nominal scales</article-title>
          <source>Educ Psychol Meas</source>
          <year>2016</year>
          <month>07</month>
          <day>02</day>
          <volume>20</volume>
          <issue>1</issue>
          <fpage>37</fpage>
          <lpage>46</lpage>
          <pub-id pub-id-type="doi">10.1177/001316446002000104</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref40">
        <label>40</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hoff</surname>
              <given-names>KA</given-names>
            </name>
            <name name-style="western">
              <surname>Bashir</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Trust in automation: integrating empirical evidence on factors that influence trust</article-title>
          <source>Hum Factors</source>
          <year>2015</year>
          <month>05</month>
          <volume>57</volume>
          <issue>3</issue>
          <fpage>407</fpage>
          <lpage>34</lpage>
          <pub-id pub-id-type="doi">10.1177/0018720814547570</pub-id>
          <pub-id pub-id-type="medline">25875432</pub-id>
          <pub-id pub-id-type="pii">0018720814547570</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref41">
        <label>41</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wilcoxon</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <article-title>Individual comparisons by ranking methods</article-title>
          <source>Breakthroughs in Statistics</source>
          <year>1992</year>
          <publisher-loc>New York</publisher-loc>
          <publisher-name>Springer</publisher-name>
          <fpage>196</fpage>
          <lpage>202</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref42">
        <label>42</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Jiang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>MT</given-names>
            </name>
            <name name-style="western">
              <surname>He</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Rosner</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Yan</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Wilcoxon rank-based tests for clustered data with R package clusrank</article-title>
          <source>J Stat Soft</source>
          <year>2020</year>
          <volume>96</volume>
          <issue>6</issue>
          <fpage>1</fpage>
          <lpage>26</lpage>
          <pub-id pub-id-type="doi">10.18637/jss.v096.i06</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref43">
        <label>43</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rosner</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Glynn</surname>
              <given-names>RJ</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>MT</given-names>
            </name>
          </person-group>
          <article-title>The Wilcoxon signed rank test for paired comparisons of clustered data</article-title>
          <source>Biometrics</source>
          <year>2006</year>
          <month>03</month>
          <volume>62</volume>
          <issue>1</issue>
          <fpage>185</fpage>
          <lpage>92</lpage>
          <pub-id pub-id-type="doi">10.1111/j.1541-0420.2005.00389.x</pub-id>
          <pub-id pub-id-type="medline">16542245</pub-id>
          <pub-id pub-id-type="pii">BIOM389</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref44">
        <label>44</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Datta</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Satten</surname>
              <given-names>GA</given-names>
            </name>
          </person-group>
          <article-title>A signed-rank test for clustered data</article-title>
          <source>Biometrics</source>
          <year>2008</year>
          <month>06</month>
          <volume>64</volume>
          <issue>2</issue>
          <fpage>501</fpage>
          <lpage>7</lpage>
          <pub-id pub-id-type="doi">10.1111/j.1541-0420.2007.00923.x</pub-id>
          <pub-id pub-id-type="medline">17970820</pub-id>
          <pub-id pub-id-type="pii">BIOM923</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref45">
        <label>45</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kerby</surname>
              <given-names>DS</given-names>
            </name>
          </person-group>
          <article-title>The simple difference formula: an approach to teaching nonparametric correlation</article-title>
          <source>Compreh Psychol</source>
          <year>2014</year>
          <month>02</month>
          <day>14</day>
          <volume>3</volume>
          <pub-id pub-id-type="doi">10.2466/11.it.3.1</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref46">
        <label>46</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Khairat</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Marc</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Crosby</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Al Sanousi</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Reasons for physicians not adopting clinical decision support systems: critical analysis</article-title>
          <source>JMIR Med Inform</source>
          <year>2018</year>
          <month>04</month>
          <day>18</day>
          <volume>6</volume>
          <issue>2</issue>
          <fpage>e24</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://medinform.jmir.org/2018/2/e24/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/medinform.8912</pub-id>
          <pub-id pub-id-type="medline">29669706</pub-id>
          <pub-id pub-id-type="pii">v6i2e24</pub-id>
          <pub-id pub-id-type="pmcid">PMC5932331</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref47">
        <label>47</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cai</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Reif</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Hegde</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Hipp</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Smilkov</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Wattenberg</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Viegas</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Corrado</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Stumpe</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Terry</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Human-centered tools for coping with imperfect algorithms during medical decision-making</article-title>
          <source>Proceedings of the 2019 CHI Conference on Human Factors in Computing Systems</source>
          <year>2019</year>
          <conf-name>CHI '19: CHI Conference on Human Factors in Computing Systems</conf-name>
          <conf-date>May 4 - 9, 2019</conf-date>
          <conf-loc>Glasgow Scotland UK</conf-loc>
          <fpage>1</fpage>
          <lpage>14</lpage>
          <pub-id pub-id-type="doi">10.1145/3290605.3300234</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref48">
        <label>48</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Heer</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Agency plus automation: designing artificial intelligence into interactive systems</article-title>
          <source>Proc Natl Acad Sci U S A</source>
          <year>2019</year>
          <month>02</month>
          <day>05</day>
          <volume>116</volume>
          <issue>6</issue>
          <fpage>1844</fpage>
          <lpage>50</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.pnas.org/cgi/pmidlookup?view=long&#38;pmid=30718389"/>
          </comment>
          <pub-id pub-id-type="doi">10.1073/pnas.1807184115</pub-id>
          <pub-id pub-id-type="medline">30718389</pub-id>
          <pub-id pub-id-type="pii">1807184115</pub-id>
          <pub-id pub-id-type="pmcid">PMC6369770</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref49">
        <label>49</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Combrisson</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Vallat</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Eichenlaub</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>O'Reilly</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Lajnef</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Guillot</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Ruby</surname>
              <given-names>PM</given-names>
            </name>
            <name name-style="western">
              <surname>Jerbi</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Sleep: an open-source python software for visualization, analysis, and staging of sleep data</article-title>
          <source>Front Neuroinform</source>
          <year>2017</year>
          <volume>11</volume>
          <fpage>60</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.3389/fninf.2017.00060"/>
          </comment>
          <pub-id pub-id-type="doi">10.3389/fninf.2017.00060</pub-id>
          <pub-id pub-id-type="medline">28983246</pub-id>
          <pub-id pub-id-type="pmcid">PMC5613192</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref50">
        <label>50</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Landis</surname>
              <given-names>JR</given-names>
            </name>
            <name name-style="western">
              <surname>Koch</surname>
              <given-names>GG</given-names>
            </name>
          </person-group>
          <article-title>The measurement of observer agreement for categorical data</article-title>
          <source>Biometrics</source>
          <year>1977</year>
          <month>03</month>
          <volume>33</volume>
          <issue>1</issue>
          <fpage>159</fpage>
          <lpage>74</lpage>
          <pub-id pub-id-type="medline">843571</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref51">
        <label>51</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Treisman</surname>
              <given-names>AM</given-names>
            </name>
            <name name-style="western">
              <surname>Gelade</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>A feature-integration theory of attention</article-title>
          <source>Cogn Psychol</source>
          <year>1980</year>
          <month>01</month>
          <volume>12</volume>
          <issue>1</issue>
          <fpage>97</fpage>
          <lpage>136</lpage>
          <pub-id pub-id-type="doi">10.1016/0010-0285(80)90005-5</pub-id>
          <pub-id pub-id-type="medline">7351125</pub-id>
          <pub-id pub-id-type="pii">0010-0285(80)90005-5</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref52">
        <label>52</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ashcraft</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Radvansky</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <source>Cognition. 6th Ed</source>
          <year>2014</year>
          <publisher-loc>Upper Saddle River, NJ</publisher-loc>
          <publisher-name>Pearson Education</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref53">
        <label>53</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Dietvorst</surname>
              <given-names>BJ</given-names>
            </name>
            <name name-style="western">
              <surname>Simmons</surname>
              <given-names>JP</given-names>
            </name>
            <name name-style="western">
              <surname>Massey</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Algorithm aversion: people erroneously avoid algorithms after seeing them err</article-title>
          <source>J Exp Psychol Gen</source>
          <year>2015</year>
          <month>02</month>
          <volume>144</volume>
          <issue>1</issue>
          <fpage>114</fpage>
          <lpage>26</lpage>
          <pub-id pub-id-type="doi">10.1037/xge0000033</pub-id>
          <pub-id pub-id-type="medline">25401381</pub-id>
          <pub-id pub-id-type="pii">2014-48748-001</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref54">
        <label>54</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lyell</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Magrabi</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Coiera</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Reduced verification of medication alerts increases prescribing errors</article-title>
          <source>Appl Clin Inform</source>
          <year>2019</year>
          <month>01</month>
          <volume>10</volume>
          <issue>1</issue>
          <fpage>66</fpage>
          <lpage>76</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/30699458"/>
          </comment>
          <pub-id pub-id-type="doi">10.1055/s-0038-1677009</pub-id>
          <pub-id pub-id-type="medline">30699458</pub-id>
          <pub-id pub-id-type="pmcid">PMC6353646</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref55">
        <label>55</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Guo</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Pleiss</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Sun</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Weinberger</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>On calibration of modern neural networks</article-title>
          <source>arXiv</source>
          <year>2017</year>
          <access-date>2021-12-29</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/pdf/1706.04599.pdf">https://arxiv.org/pdf/1706.04599.pdf</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref56">
        <label>56</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Parekh</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Selesnick</surname>
              <given-names>IW</given-names>
            </name>
            <name name-style="western">
              <surname>Rapoport</surname>
              <given-names>DM</given-names>
            </name>
            <name name-style="western">
              <surname>Ayappa</surname>
              <given-names>I</given-names>
            </name>
          </person-group>
          <article-title>Detection of K-complexes and sleep spindles (DETOKS) using sparse optimization</article-title>
          <source>J Neurosci Methods</source>
          <year>2015</year>
          <month>08</month>
          <day>15</day>
          <volume>251</volume>
          <fpage>37</fpage>
          <lpage>46</lpage>
          <pub-id pub-id-type="doi">10.1016/j.jneumeth.2015.04.006</pub-id>
          <pub-id pub-id-type="medline">25956566</pub-id>
          <pub-id pub-id-type="pii">S0165-0270(15)00153-3</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref57">
        <label>57</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bremer</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Smith</surname>
              <given-names>JR</given-names>
            </name>
            <name name-style="western">
              <surname>Karacan</surname>
              <given-names>I</given-names>
            </name>
          </person-group>
          <article-title>Automatic detection of the K-complex in sleep electroencephalograms</article-title>
          <source>IEEE Trans Biomed Eng</source>
          <year>1970</year>
          <month>10</month>
          <volume>17</volume>
          <issue>4</issue>
          <fpage>314</fpage>
          <lpage>23</lpage>
          <pub-id pub-id-type="doi">10.1109/tbme.1970.4502759</pub-id>
          <pub-id pub-id-type="medline">5518827</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref58">
        <label>58</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Warby</surname>
              <given-names>SC</given-names>
            </name>
            <name name-style="western">
              <surname>Wendt</surname>
              <given-names>SL</given-names>
            </name>
            <name name-style="western">
              <surname>Welinder</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Munk</surname>
              <given-names>EG</given-names>
            </name>
            <name name-style="western">
              <surname>Carrillo</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Sorensen</surname>
              <given-names>HB</given-names>
            </name>
            <name name-style="western">
              <surname>Jennum</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Peppard</surname>
              <given-names>PE</given-names>
            </name>
            <name name-style="western">
              <surname>Perona</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Mignot</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Sleep-spindle detection: crowdsourcing and evaluating performance of experts, non-experts and automated methods</article-title>
          <source>Nat Methods</source>
          <year>2014</year>
          <month>04</month>
          <volume>11</volume>
          <issue>4</issue>
          <fpage>385</fpage>
          <lpage>92</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/24562424"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/nmeth.2855</pub-id>
          <pub-id pub-id-type="medline">24562424</pub-id>
          <pub-id pub-id-type="pii">nmeth.2855</pub-id>
          <pub-id pub-id-type="pmcid">PMC3972193</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref59">
        <label>59</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Xie</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Kao</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Gao</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>X</given-names>
            </name>
          </person-group>
          <article-title>CheXplain: enabling physicians to explore and understand data-driven, AI-enabled medical imaging analysis</article-title>
          <source>Proceedings of the 2020 CHI Conference on Human Factors in Computing Systems</source>
          <year>2020</year>
          <conf-name>CHI '20: CHI Conference on Human Factors in Computing Systems</conf-name>
          <conf-date>April 25 - 30, 2020</conf-date>
          <conf-loc>Honolulu HI USA</conf-loc>
          <fpage>1</fpage>
          <lpage>13</lpage>
          <pub-id pub-id-type="doi">10.1145/3313831.3376807</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref60">
        <label>60</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hart</surname>
              <given-names>SG</given-names>
            </name>
          </person-group>
          <article-title>Nasa-Task Load Index (NASA-TLX); 20 years later</article-title>
          <source>Proc Hum Factors Ergon Soc Annu Meet</source>
          <year>2016</year>
          <month>11</month>
          <day>05</day>
          <volume>50</volume>
          <issue>9</issue>
          <fpage>904</fpage>
          <lpage>8</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://pro.sagepub.com/content/50/9/904.full.pdf+html"/>
          </comment>
          <pub-id pub-id-type="doi">10.1177/154193120605000909</pub-id>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
