<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="review-article"><front><journal-meta><journal-id journal-id-type="nlm-ta">J Med Internet Res</journal-id><journal-id journal-id-type="publisher-id">jmir</journal-id><journal-id journal-id-type="index">1</journal-id><journal-title>Journal of Medical Internet Research</journal-title><abbrev-journal-title>J Med Internet Res</abbrev-journal-title><issn pub-type="epub">1438-8871</issn><publisher><publisher-name>JMIR Publications</publisher-name><publisher-loc>Toronto, Canada</publisher-loc></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">v28i1e70417</article-id><article-id pub-id-type="doi">10.2196/70417</article-id><article-categories><subj-group subj-group-type="heading"><subject>Review</subject></subj-group></article-categories><title-group><article-title>Innovations in Deaf Health Care Communication: Systematic Review of Sign Language Recognition Systems</article-title></title-group><contrib-group><contrib contrib-type="author" corresp="yes"><name name-style="western"><surname>Marcolino</surname><given-names>Milena Soriano</given-names></name><degrees>MD, PhD</degrees><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="aff" rid="aff2">2</xref><xref ref-type="aff" rid="aff3">3</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Oliveira</surname><given-names>Lucca Fagundes Ramos de</given-names></name><degrees>MD</degrees><xref ref-type="aff" rid="aff2">2</xref><xref ref-type="aff" rid="aff4">4</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Valle</surname><given-names>Lucas Rocha</given-names></name><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Rosa</surname><given-names>Luiza Marinho Motta Santa</given-names></name><degrees>MD</degrees><xref ref-type="aff" rid="aff2">2</xref><xref ref-type="aff" rid="aff5">5</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Sanches</surname><given-names>Gabriela Teodora de Souza</given-names></name><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Santos</surname><given-names>Natalia Sales</given-names></name><degrees>MSc</degrees><xref ref-type="aff" rid="aff6">6</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Costa</surname><given-names>Michelle Ralil da</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff3">3</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Bernardino</surname><given-names>Elidea Lucia Almeida</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff7">7</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Cordeiro</surname><given-names>Raniere Alislan Almeida</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff7">7</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Prates</surname><given-names>Raquel Oliveira</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff6">6</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Reis</surname><given-names>Zilma Silveira Nogueira</given-names></name><degrees>MD, PhD</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Campos</surname><given-names>Mario Fernando Montenegro</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff6">6</xref></contrib></contrib-group><aff id="aff1"><institution>Medical School, Universidade Federal de Minas Gerais</institution><addr-line>Belo Horizonte</addr-line><country>Brazil</country></aff><aff id="aff2"><institution>Telehealth Center, University Hospital, Universidade Federal de Minas Gerais</institution><addr-line>Avenida Professor Alfredo Balena, 190, Santa Efig&#x00EA;nia</addr-line><addr-line>Belo Horizonte</addr-line><country>Brazil</country></aff><aff id="aff3"><institution>Institute for Health Technology Assessment</institution><addr-line>Porto Alegre</addr-line><country>Brazil</country></aff><aff id="aff4"><institution>Medical School, Universidade Federal de Ouro Preto</institution><addr-line>Ouro Preto</addr-line><country>Brazil</country></aff><aff id="aff5"><institution>Faculdade de Ci&#x00EA;ncias M&#x00E9;dicas de Minas Gerais</institution><addr-line>Belo Horizonte</addr-line><country>Brazil</country></aff><aff id="aff6"><institution>Department of Computer Science, Universidade Federal de Minas Gerais</institution><addr-line>Belo Horizonte</addr-line><country>Brazil</country></aff><aff id="aff7"><institution>Faculty of Arts and Sciences, Universidade Federal de Minas Gerais</institution><addr-line>Belo Horizonte</addr-line><country>Brazil</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Li</surname><given-names>Yike</given-names></name></contrib></contrib-group><contrib-group><contrib contrib-type="reviewer"><name name-style="western"><surname>Holzinger</surname><given-names>Andreas</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Singh</surname><given-names>Sudhakar</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Yang</surname><given-names>Zhixiong</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to Milena Soriano Marcolino, MD, PhD, Telehealth Center, University Hospital, Universidade Federal de Minas Gerais, Avenida Professor Alfredo Balena, 190, Santa Efig&#x00EA;nia, Belo Horizonte, CEP 30130-100, Brazil, +55 31 3307 9201; <email>milenamarc@gmail.com</email></corresp></author-notes><pub-date pub-type="collection"><year>2026</year></pub-date><pub-date pub-type="epub"><day>9</day><month>4</month><year>2026</year></pub-date><volume>28</volume><elocation-id>e70417</elocation-id><history><date date-type="received"><day>22</day><month>12</month><year>2024</year></date><date date-type="rev-recd"><day>03</day><month>08</month><year>2025</year></date><date date-type="accepted"><day>05</day><month>08</month><year>2025</year></date></history><copyright-statement>&#x00A9; Milena Soriano Marcolino, Lucca Fagundes Ramos de Oliveira, Lucas Rocha Valle, Luiza Marinho Motta Santa Rosa, Gabriela Teodora de Souza Sanches, Natalia Sales Santos, Michelle Ralil da Costa, Elidea Lucia Almeida Bernardino, Raniere Alislan Almeida Cordeiro, Raquel Oliveira Prates, Zilma Silveira Nogueira Reis, Mario Fernando Montenegro Campos. Originally published in the Journal of Medical Internet Research (<ext-link ext-link-type="uri" xlink:href="https://www.jmir.org">https://www.jmir.org</ext-link>), 9.4.2026. </copyright-statement><copyright-year>2026</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in the Journal of Medical Internet Research (ISSN 1438-8871), is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="https://www.jmir.org/">https://www.jmir.org/</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://www.jmir.org/2026/1/e70417"/><abstract><sec><title>Background</title><p>Deaf individuals often face communication challenges when interacting with those who can hear. Within health care settings, these challenges may pose risks to their safety, potentially resulting in misdiagnoses, treatment errors, and decreased quality of care.</p></sec><sec><title>Objective</title><p>This study aims to systematically review the evidence on communication systems reported in the literature that use human-computer interaction techniques to support communication between deaf individuals who use sign language and hearing health professionals in health care settings. The review focuses on systems that are either currently in use or proposed for use in health care and that have been tested using human participants or videos of human users.</p></sec><sec sec-type="methods"><title>Methods</title><p>A comprehensive search was performed via MEDLINE, Web of Science, ACM, IEEE Xplore, Scopus, and Google Scholar in March 2025. The inclusion criteria comprised studies developing a sign language recognition system within a health care context and testing with human users. Eligible studies underwent screening by 2 independent investigators (LRV and LMMSR or LFRdO and GTdSS), with any disagreements resolved by a senior researcher (MSM).</p></sec><sec sec-type="results"><title>Results</title><p>The search retrieved 21,778 publications, and screening of reference lists identified 2 additional studies, resulting in a total of 23 studies meeting the eligibility criteria. Most systems (15/23, 65.2%) were image-based, while 34.8% (8/23) relied on sensors (glove-based or depth-sensing). Applications varied across health care settings, including general hospital care (10/23, 43.5%), emergencies (8/23, 34.8%), and primary care (4/23, 17.4%). All systems were in the development and testing stage, with no data on security and psychological impacts. Accuracy ranged from 25% to 100% for image-based and 72% to 99.7% for sensor-based systems. Bidirectionality and facial expression recognition, crucial for effective communication, were largely overlooked.</p></sec><sec sec-type="conclusions"><title>Conclusions</title><p>Image-based systems were more common than sensor-based ones, though both showed wide variability in accuracy in recognizing and interpreting signs. Most systems failed to address critical aspects such as bidirectional communication and the recognition of facial expressions, essential for effective communication. None fully addresses the requirements for integration into health care settings. These findings highlight the need for further research on implementation, usability, and impact on the quality of care for deaf patients.</p></sec><sec sec-type="registered-report"><title>International Registered Report Identifier (IRRID)</title><p>RR2-10.2196/55427</p></sec></abstract><kwd-group><kwd>computer neural network</kwd><kwd>artificial intelligence</kwd><kwd>biomedical technology</kwd><kwd>sign language</kwd><kwd>hearing loss</kwd><kwd>deafness</kwd><kwd>communication barriers</kwd><kwd>gestures</kwd><kwd>PRISMA</kwd><kwd>Preferred Reporting Items for Systematic Reviews and Meta-Analyses</kwd></kwd-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><p>Sign languages are a form of communication characterized by the successful coordination of gestures, body, head, and hand motions, with facial expressions. They are naturally developed and highly structured systems, governed by a set of linguistic rules, different from spoken languages with no standardized written forms [<xref ref-type="bibr" rid="ref1">1</xref>]. They enable comprehensive and rich linguistic systems for formulating words and phrases, making them the primary mode of communication for many hearing-impaired people [<xref ref-type="bibr" rid="ref2">2</xref>]. According to the World Health Organization, over 430 million people worldwide experience disabling hearing loss, a number expected to exceed 700 million by 2050 [<xref ref-type="bibr" rid="ref3">3</xref>]. Despite the World Federation of the Deaf estimating that there are over 200 sign languages and over 70 million deaf individuals who use them, reliable data on sign language users remains scarce [<xref ref-type="bibr" rid="ref4">4</xref>].</p><p>Even though sign language is the primary mode of communication for millions of deaf individuals, they still encounter significant barriers in daily interactions, particularly in critical areas such as health care, education, and public services, where most professionals do not understand or use sign language [<xref ref-type="bibr" rid="ref5">5</xref>-<xref ref-type="bibr" rid="ref7">7</xref>]. Specifically in health care, this communication gap can lead to misdiagnoses, inadequate treatment, and reduced quality of care [<xref ref-type="bibr" rid="ref8">8</xref>].</p><p>In response to this challenge, and recognizing the significance of sign language in fostering social integration for the deaf community, researchers have developed sign language recognition (SLR) systems. SLR refers to the identification and interpretation of sign language gestures and movements [<xref ref-type="bibr" rid="ref9">9</xref>]. These systems aim to facilitate real-time translation between sign and spoken languages, to build a communication bridge between deaf individuals who communicate through sign language and ordinary people, and to enhance accessibility and inclusion across multiple domains, including health care [<xref ref-type="bibr" rid="ref10">10</xref>]. In this pursuit, machine learning and other artificial intelligence (AI) techniques have become important emerging tools to help overcome communication barriers, enhancing not only accessibility but also valuing the cultural identity of the deaf community.</p><p>Regarding automatic SLR systems, it is crucial to highlight the use of advanced algorithms and other AI techniques to interpret the complex gestures and movements inherent to sign language communication. With extensive use of advanced computer vision algorithms and machine learning techniques, these systems are becoming increasingly able to automatically recognize and translate signs into text of the target language, thereby facilitating effective communication between deaf individuals and those who do not understand sign language [<xref ref-type="bibr" rid="ref11">11</xref>-<xref ref-type="bibr" rid="ref13">13</xref>].</p><p>The SLR systems comprise different types of methodologies and technologies to recognize and translate sign language. In this paper, we organize them into 2 main categories: image-based and sensor-based. Even though strictly speaking, cameras are sensors, we used sensor-based to identify the approaches that use other sensor modalities. Image-based approaches use computer vision techniques coupled with deep learning models to analyze video streams from cameras that capture hand gestures, body movements, and facial expressions to accurately detect and interpret signs [<xref ref-type="bibr" rid="ref11">11</xref>,<xref ref-type="bibr" rid="ref13">13</xref>,<xref ref-type="bibr" rid="ref14">14</xref>]. Sensor-based systems use wearable devices equipped with sensors, such as accelerometers and gyroscopes, to capture hand and body movements and translate signs into written or spoken language [<xref ref-type="bibr" rid="ref11">11</xref>,<xref ref-type="bibr" rid="ref12">12</xref>,<xref ref-type="bibr" rid="ref15">15</xref>]. These systems can be divided into 2 subgroups: glove-based systems and depth-sensor systems. Glove-based systems use specialized gloves embedded with sensors to capture fine-grained hand movements, enabling real-time translation into spoken or written language. In contrast, depth-sensor systems rely on depth-sensing cameras, such as Kinect or light detection and ranging, to obtain 3D information about hand and body movements. By analyzing the 3D pose of different body parts, these systems allow more precise recognition and interpretation of sign language gestures [<xref ref-type="bibr" rid="ref16">16</xref>].</p><p>Each of the aforementioned types of systems has strengths and limitations concerning accuracy in detecting and interpreting signs, portability, cost, and accessibility. Additionally, user-friendliness is an important consideration in the development and implementation of SLR systems. It is critical for ensuring accessibility and inclusion for deaf individuals. As these systems are intended to be used by people with varying levels of technical expertise, their design must prioritize simplicity, intuitiveness, and ease of use [<xref ref-type="bibr" rid="ref17">17</xref>]. This is particularly true in health care settings, where effective communication with medical staff is essential. Furthermore, given the sensitive nature of health care interactions, it is crucial to assess how these systems address ethical concerns, such as patient data privacy, and practical challenges related to accessibility, including cost and implementation feasibility in diverse health care settings.</p><p>Therefore, this study aims to systematically review the evidence on the translation systems developed for deaf people who communicate through sign language with hearing health professionals in a health care context, which are already in use or proposed for use and have been tested with human users or videos of human users. The main research question was: What technologies have been developed and tested in real-world settings to translate sign and oral languages, facilitating communication between deaf patients who primarily use sign language and health care workers? The specific questions are as follows:</p><list list-type="bullet"><list-item><p>In which context of health care have these technologies been used?</p></list-item><list-item><p>Which languages (sign and oral) can these technologies translate?</p></list-item><list-item><p>Which technologies are required for it to be used on-site? How were they developed?</p></list-item><list-item><p>How were they deployed and tested?</p></list-item><list-item><p>How has the communication between health care workers and deaf people been improved by using these technologies?</p></list-item><list-item><p>How was the efficacy of these technologies evaluated?</p></list-item><list-item><p>Is the system or technology &#x201C;bidirectionally&#x201D; interactive?</p></list-item><list-item><p>How do these systems address ethical concerns in health care settings, such as patient privacy and data security?</p></list-item></list></sec><sec id="s2" sec-type="methods"><title>Methods</title><sec id="s2-1"><title>Overview</title><p>The research protocol was registered in the Open Science Framework and previously published in detail [<xref ref-type="bibr" rid="ref18">18</xref>,<xref ref-type="bibr" rid="ref19">19</xref>]. It followed guidance from the Cochrane Guidelines and the PRISMA (Preferred Reporting Items for Systematic Reviews and Meta-Analyses) statement [<xref ref-type="bibr" rid="ref20">20</xref>,<xref ref-type="bibr" rid="ref21">21</xref>] (<xref ref-type="supplementary-material" rid="app7">Checklist 1</xref>). A multidisciplinary team comprising researchers from health and computing domains, along with linguistic specialists in sign language, collaboratively conducted the systematic review. Two members of the team are linguistic specialists and sign language researchers, one of them is deaf.</p></sec><sec id="s2-2"><title>Search Strategy</title><p>Independent researchers performed a literature search using Web of Science, MEDLINE, IEEE Xplore, ACM, Scopus, and Google Scholar. A preliminary search strategy, developed by 5 authors (MSM, LFRO, LRV, ROP, and ZSNR), incorporated MeSH and defined text words relevant to the topic (<xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). The last search was conducted on March 27, 2025.</p><p>All studies, regardless of publication date or language, from inception onward were considered. In the case of unpublished studies, the authors were contacted at least 3 times via email or other social networks used by researchers to request additional information. Reference lists of eligible studies were examined to identify additional eligible studies.</p></sec><sec id="s2-3"><title>Study Selection</title><p>Prospective, retrospective, or descriptive studies that address the development of communication systems specifically designed for deaf individuals in health care encounters and that involve testing with human users or videos of human users were included. Human users could be people of any age who are deaf and whose primary communication modality is sign language.</p><p>Studies that do not address the specified research questions, do not mention testing with human users or videos of human users, or do not mention use in health care encounter contexts were excluded. Short communications, conference abstracts, and correspondences were not excluded.</p><p>Independent researchers blindly screened the studies. Titles and abstracts of identified studies were individually reviewed to assess eligibility. Full-text versions of papers that were not excluded at this initial stage were read for a thorough examination. Subsequently, potentially pertinent studies were independently evaluated to ascertain if they aligned with the inclusion criteria. Any disagreements were resolved by a senior researcher (MSM). Whenever necessary, corresponding authors were contacted to obtain data not included in the publication using email and ResearchGate.</p><p>Search data for the identified studies and information for each stage of study selection were registered in detail, following the guidelines of the PRISMA methodology [<xref ref-type="bibr" rid="ref20">20</xref>,<xref ref-type="bibr" rid="ref21">21</xref>].</p></sec><sec id="s2-4"><title>Data Extraction</title><p>A data extraction table was custom-designed for this study and independently piloted by 2 researchers, as well as data extraction. The extraction was checked by 2 other reviewers, one researcher with a computer science background and a senior researcher with a health science background. Conflicts were resolved by consensus or by consulting a senior researcher (MSM). Details on the variables extracted were previously published [<xref ref-type="bibr" rid="ref18">18</xref>,<xref ref-type="bibr" rid="ref19">19</xref>].</p><p>Furthermore, the researchers responsible for each study included in the systematic review were contacted via email (with up to 3 attempts) and ResearchGate (at least 1 attempt) to obtain updates on the current status of their systems and to request any additional information regarding their application in real-world contexts.</p><p>The definitions used regarding the type of SLR system, the corpus formation, and the health context are available in <xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>. To extract data from papers that met the eligibility criteria, the authors of the study developed a codebook with clear definitions for all variables to ensure consistent data collection (<xref ref-type="supplementary-material" rid="app3">Multimedia Appendix 3</xref>).</p></sec><sec id="s2-5"><title>Data Analysis</title><p>A qualitative synthesis was performed to analyze the data, and a narrative synthesis of the evidence was conducted to provide an overview of the results. The results are summarized according to system types: image-based and sensor-based.</p></sec></sec><sec id="s3" sec-type="results"><title>Results</title><sec id="s3-1"><title>Search Results and Study Selection</title><p>The search retrieved 21,778 publications, 2021 of which were duplicates. Most studies were excluded after title and abstract analysis (n=19,605). In total, 6 reports were unavailable, of which 4 did not respond despite repetitive contact attempts with corresponding authors, and 2 did not make any contact details available (<xref ref-type="supplementary-material" rid="app4">Multimedia Appendix 4</xref>), thus leaving 146 selected for full-text screening. Of those, 22 studies were selected for inclusion after applying the eligibility criteria. In 2 cases, publications by the same research groups were identified. The first case involved a deep learning&#x2013;based system for recognizing emergency gestures in Indian Sign Language to support communication with hearing-impaired individuals [<xref ref-type="bibr" rid="ref22">22</xref>,<xref ref-type="bibr" rid="ref23">23</xref>]. The journal paper published in IEEE Access in 2022 represents an extended and more comprehensive version of the earlier conference paper. It includes additional models (such as 3D convolutional neural network [CNN] and You Only Look Once [YOLO] v5), a more detailed methodology, a larger dataset, and more robust performance metrics, including mean average precision, precision, and recall. Given its methodological completeness and broader evaluation, only the IEEE Access paper was included in this review, while the conference version was excluded to avoid duplication of data and analysis [<xref ref-type="bibr" rid="ref22">22</xref>].</p><p>The second case involved 2 publications from a Brazilian research group describing a system for recognizing Brazilian Sign Language (Libras) in the health context [<xref ref-type="bibr" rid="ref15">15</xref>,<xref ref-type="bibr" rid="ref24">24</xref>]. The 2024 journal paper incorporates a more advanced architecture (multiple-stream versus 2-stream), enhanced performance results, and a more detailed methodology, while using the same dataset as the earlier work. Thus, only the 2024 version was included [<xref ref-type="bibr" rid="ref15">15</xref>].</p><p>Additionally, 2 studies were obtained from reference list screening, totaling 23 studies ultimately included in the review (<xref ref-type="fig" rid="figure1">Figure 1</xref>) [<xref ref-type="bibr" rid="ref12">12</xref>-<xref ref-type="bibr" rid="ref15">15</xref>,<xref ref-type="bibr" rid="ref22">22</xref>,<xref ref-type="bibr" rid="ref25">25</xref>-<xref ref-type="bibr" rid="ref42">42</xref>].</p><fig position="float" id="figure1"><label>Figure 1.</label><caption><p>PRISMA (Preferred Reporting Items for Systematic Reviews and Meta-Analyses) flowchart [<xref ref-type="bibr" rid="ref21">21</xref>].</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="jmir_v28i1e70417_fig01.png"/></fig></sec><sec id="s3-2"><title>Characteristics of Included Studies</title><p>The main characteristics of the included studies are summarized in <xref ref-type="table" rid="table1">Tables 1</xref> and <xref ref-type="table" rid="table2">2</xref> and <xref ref-type="supplementary-material" rid="app5">Multimedia Appendix 5</xref> [<xref ref-type="bibr" rid="ref12">12</xref>-<xref ref-type="bibr" rid="ref15">15</xref>,<xref ref-type="bibr" rid="ref22">22</xref>,<xref ref-type="bibr" rid="ref25">25</xref>-<xref ref-type="bibr" rid="ref42">42</xref>]. They were published in 18 journals and conferences from 2015 to 2024. All of them were published in English. In 15 studies, the system was classified as image-based [<xref ref-type="bibr" rid="ref13">13</xref>-<xref ref-type="bibr" rid="ref15">15</xref>,<xref ref-type="bibr" rid="ref22">22</xref>,<xref ref-type="bibr" rid="ref25">25</xref>-<xref ref-type="bibr" rid="ref28">28</xref>,<xref ref-type="bibr" rid="ref30">30</xref>,<xref ref-type="bibr" rid="ref31">31</xref>,<xref ref-type="bibr" rid="ref36">36</xref>-<xref ref-type="bibr" rid="ref39">39</xref>,<xref ref-type="bibr" rid="ref42">42</xref>], and in 7, it was sensor-based. Of these, 4 were depth-sensing [<xref ref-type="bibr" rid="ref29">29</xref>,<xref ref-type="bibr" rid="ref33">33</xref>,<xref ref-type="bibr" rid="ref35">35</xref>,<xref ref-type="bibr" rid="ref41">41</xref>], and 3 were glove-based [<xref ref-type="bibr" rid="ref32">32</xref>,<xref ref-type="bibr" rid="ref34">34</xref>,<xref ref-type="bibr" rid="ref40">40</xref>].</p><table-wrap id="t1" position="float"><label>Table 1.</label><caption><p>Main architectural, functional, and technical characteristics of sign language recognition systems.</p></caption><table id="table1" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom"/><td align="left" valign="bottom">Languages involved (oral and sign)</td><td align="left" valign="bottom" colspan="2">Technology needed</td><td align="left" valign="bottom">Infrastructure needed</td></tr><tr><td align="left" valign="bottom"/><td align="left" valign="bottom"/><td align="left" valign="bottom">Hardware or software</td><td align="left" valign="bottom">Development technology<sup><xref ref-type="table-fn" rid="table1fn1">a</xref></sup></td><td align="left" valign="bottom"/></tr></thead><tbody><tr><td align="left" valign="top" colspan="5">Image-based</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Xia et al (2022) [<xref ref-type="bibr" rid="ref13">13</xref>]</td><td align="left" valign="top">Chinese and Chinese Sign Language</td><td align="left" valign="top">Kendryte K210, 8-megapixel camera, microphone arrays, operating system FreeRTOS, LabelImg, Autodesk Inventor</td><td align="left" valign="top">TensorFlow, Keras, Darknet</td><td align="left" valign="top">SLA<sup><xref ref-type="table-fn" rid="table1fn2">b</xref></sup> 3D printing technology and heart-speaker device</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Pikoulis et al (2022) [<xref ref-type="bibr" rid="ref14">14</xref>]</td><td align="left" valign="top">Greek and Greek Sign Language</td><td align="left" valign="top">No information</td><td align="left" valign="top">Sentence-BERT<sup><xref ref-type="table-fn" rid="table1fn3">c</xref></sup>, Google Mediapipe</td><td align="left" valign="top">Computer and camera</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>da Silva et al (2024) [<xref ref-type="bibr" rid="ref15">15</xref>]</td><td align="left" valign="top">Brazilian Portuguese and Brazilian Sign Language (Libras)</td><td align="left" valign="top">Intel Core i3 and all experiments<break/>were performed on Ubuntu 18.04 LTS<sup><xref ref-type="table-fn" rid="table1fn4">d</xref></sup></td><td align="left" valign="top">LSTM<sup><xref ref-type="table-fn" rid="table1fn5">e</xref></sup> network, OpenPose, Keras, cuDNN<sup><xref ref-type="table-fn" rid="table1fn6">f</xref></sup> 8.5</td><td align="left" valign="top">Computer and smartphone HD<sup><xref ref-type="table-fn" rid="table1fn7">g</xref></sup> camera</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Das et al (2023) [<xref ref-type="bibr" rid="ref25">25</xref>]</td><td align="left" valign="top">English and Indian Sign Language</td><td align="left" valign="top">No information</td><td align="left" valign="top">CNN<sup><xref ref-type="table-fn" rid="table1fn8">h</xref></sup>, BiLSTM<sup><xref ref-type="table-fn" rid="table1fn9">i</xref></sup></td><td align="left" valign="top">Computer and camera</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Ko et al (2019) [<xref ref-type="bibr" rid="ref26">26</xref>]</td><td align="left" valign="top">Korean and Korean Sign Language</td><td align="left" valign="top">No information</td><td align="left" valign="top">OpenPose, PyTorch</td><td align="left" valign="top">Computer and camera</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Barrientos-Villalta et al (2022) [<xref ref-type="bibr" rid="ref27">27</xref>]</td><td align="left" valign="top">Peruvian Spanish and Peruvian Sign Language</td><td align="left" valign="top">Google Cloud, Storage, Google Cloud Run</td><td align="left" valign="top">Google Mediapipe, LSTM neural network</td><td align="left" valign="top">Mobile devices and internet</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Ram&#x00ED;rez S&#x00E1;nchez et al (2021) [<xref ref-type="bibr" rid="ref28">28</xref>]</td><td align="left" valign="top">Spanish and Mexican Sign Language</td><td align="left" valign="top">Webcam</td><td align="left" valign="top">Google MediaPipe, OpenCV, CNN, HMM<sup><xref ref-type="table-fn" rid="table1fn10">j</xref></sup>, Viterbi algorithm</td><td align="left" valign="top">Webcam and computer</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Gandhi et al (2021) [<xref ref-type="bibr" rid="ref30">30</xref>]</td><td align="left" valign="top">English and Indian Sign Language</td><td align="left" valign="top">Mobile phones with their camera resolutions</td><td align="left" valign="top">CNN using Image Stacking (VGG-16<sup><xref ref-type="table-fn" rid="table1fn11">k</xref></sup> and Resnet50) CNN + LSTM and LSTM with OpenPose</td><td align="left" valign="top">Mobile phone and camera</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Uchil et al (2019) [<xref ref-type="bibr" rid="ref31">31</xref>]</td><td align="left" valign="top">English and Indian Sign Language</td><td align="left" valign="top">Intel Core i5 CPU<sup><xref ref-type="table-fn" rid="table1fn12">l</xref></sup> running macOS Mojave and Core i3 CPU running Windows 7, smartphone</td><td align="left" valign="top">OpenPose library, OpenCV</td><td align="left" valign="top">Mobile phone camera</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Areeb et al (2022) [<xref ref-type="bibr" rid="ref22">22</xref>]</td><td align="left" valign="top">English and Indian Sign Language</td><td align="left" valign="top">No information</td><td align="left" valign="top">3D CNN, pretrained VGG-16, LSTM (RNN-LSTM)<sup><xref ref-type="table-fn" rid="table1fn13">m</xref></sup> scheme, YOLO<sup><xref ref-type="table-fn" rid="table1fn14">n</xref></sup> v5</td><td align="left" valign="top">Computer and camera</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Adithya and Rajesh (2020) [<xref ref-type="bibr" rid="ref36">36</xref>]</td><td align="left" valign="top">English and Indian Sign Language</td><td align="left" valign="top">Digital camera</td><td align="left" valign="top">LSTM network</td><td align="left" valign="top">Computer and camera</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Ihsan et al (2024) [<xref ref-type="bibr" rid="ref37">37</xref>]</td><td align="left" valign="top">English and American Sign Language</td><td align="left" valign="top">Intel Core i5, Visual Studio Code</td><td align="left" valign="top">Pretrained CNN (MobileNetV2) model, BiLSTM model, Mediapipe, TensorFlow, Keras</td><td align="left" valign="top">Computer, camera, and smartphones</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Das et al (2024) [<xref ref-type="bibr" rid="ref38">38</xref>]</td><td align="left" valign="top">English and Indian Sign Language</td><td align="left" valign="top">No information</td><td align="left" valign="top">CNNs, LSTM, long-term recurrent convolutional network model</td><td align="left" valign="top">Computer and mobile phone camera</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Faisal et al (2023) [<xref ref-type="bibr" rid="ref39">39</xref>]</td><td align="left" valign="top">Arabic and Saudi Sign Language</td><td align="left" valign="top">Robot Operating System, IClone Pro, 3DXchange, Unity</td><td align="left" valign="top">Self-developed concise 3D graph convolutional network, time delay neural network model, DTW<sup><xref ref-type="table-fn" rid="table1fn15">o</xref></sup> algorithm, FastSpeech2, Kaldi toolkit, MediaPipe</td><td align="left" valign="top">Portable electronic devices or computers equipped with a camera</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Bellil et al (2024) [<xref ref-type="bibr" rid="ref42">42</xref>]</td><td align="left" valign="top">Arabic and Algerian Sign Language</td><td align="left" valign="top">iPhone XS MAX</td><td align="left" valign="top">1D-CNN, MediaPipe</td><td align="left" valign="top">Computer and smartphone</td></tr><tr><td align="left" valign="top" colspan="5">Sensor-based (depth-sensing)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Hisham and Hamouda (2019) [<xref ref-type="bibr" rid="ref29">29</xref>]</td><td align="left" valign="top">Egyptian Arabic and Egyptian Arabic Sign Language</td><td align="left" valign="top">Kinect SDK, Windows 7, Visual Basic.NET, C# and C++</td><td align="left" valign="top">Bayesian network, Ada-Boosting, DTW, and HMM</td><td align="left" valign="top">Computer and Microsoft Kinect sensor</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Sarhan et al (2015) [<xref ref-type="bibr" rid="ref33">33</xref>]</td><td align="left" valign="top">Arabic and Arabic Sign Language</td><td align="left" valign="top">No information</td><td align="left" valign="top">HMM</td><td align="left" valign="top">Computer and Microsoft Kinect sensor</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>S&#x00FC;zg&#x00FC;n et al (2015) [<xref ref-type="bibr" rid="ref35">35</xref>]</td><td align="left" valign="top">English and Turkish Sign Language</td><td align="left" valign="top">Personal computer and a touch display</td><td align="left" valign="top">DTW algorithm</td><td align="left" valign="top">Computer and Microsoft Kinect sensor</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Dewasurendra et al (2020) [<xref ref-type="bibr" rid="ref41">41</xref>]</td><td align="left" valign="top">Sinhala and Sri Lankan Sign Language</td><td align="left" valign="top">Webcam, mobile device with camera, microphone, GPS</td><td align="left" valign="top">EfficientNet-Lite0, Pillow (Python image processing library), MaryTTS Framework, CMU<sup><xref ref-type="table-fn" rid="table1fn16">p</xref></sup> Sphinx 4 toolkit (Sphinx4+ SphinxTrain) and natural language processing, TensorFlow</td><td align="left" valign="top">Kinect and Leap Motion Controller</td></tr><tr><td align="left" valign="top" colspan="5">Sensor-based (glove-based)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Deji Dere et al (2022) [<xref ref-type="bibr" rid="ref32">32</xref>]</td><td align="left" valign="top">English and American Sign Language</td><td align="left" valign="top">Arduino Nano 33 BLE, Edge Impulse software</td><td align="left" valign="top">1D-CNN model</td><td align="left" valign="top">Inertial Measurement Unit</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Guo et al (2023) [<xref ref-type="bibr" rid="ref34">34</xref>]</td><td align="left" valign="top">English and American Sign Language</td><td align="left" valign="top">INMO AIR with Android 10 Go</td><td align="left" valign="top">Android-MediaPipe, VOSK<sup><xref ref-type="table-fn" rid="table1fn17">q</xref></sup> library</td><td align="left" valign="top">Mobile phone and augmented reality glasses<sup><xref ref-type="table-fn" rid="table1fn18">r</xref></sup></td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Luqman and Mahmoud (2020) [<xref ref-type="bibr" rid="ref40">40</xref>]</td><td align="left" valign="top">Arabic and Arabic Sign Language</td><td align="left" valign="top">No information</td><td align="left" valign="top">KenLM (statistical language model), MADAMIRA<sup><xref ref-type="table-fn" rid="table1fn19">s</xref></sup> (morphological analyzer and disambiguation tool)</td><td align="left" valign="top">Computer and camera</td></tr><tr><td align="left" valign="top" colspan="5">Hybrid</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Sosa-Jim&#x00E9;nez et al (2022) [<xref ref-type="bibr" rid="ref12">12</xref>]</td><td align="left" valign="top">Mexican Spanish and Mexican Sign Language</td><td align="left" valign="top">Intel Core i7 running Windows 7, Intel Core i5 running Windows 8.1, Microsoft Visual Studio 2015</td><td align="left" valign="top">Markov models (probabilistic) and neural networks</td><td align="left" valign="top">Computer and Microsoft Kinect sensor</td></tr></tbody></table><table-wrap-foot><fn id="table1fn1"><p><sup>a</sup>Artificial intelligence and imaging processing.</p></fn><fn id="table1fn2"><p><sup>b</sup>SLA: stereolithography. </p></fn><fn id="table1fn3"><p><sup>c</sup>BERT: bidirectional encoder representations from transformers. </p></fn><fn id="table1fn4"><p><sup>d</sup>LTS: long-term support. </p></fn><fn id="table1fn5"><p><sup>e</sup>LSTM: long short-term memory.</p></fn><fn id="table1fn6"><p><sup>f</sup>cuDNN: CUDA deep neural network library. </p></fn><fn id="table1fn7"><p><sup>g</sup>HD: high definition. </p></fn><fn id="table1fn8"><p><sup>h</sup>CNN: convolutional neural network.</p></fn><fn id="table1fn9"><p><sup>i</sup>BiLSTM: bidirectional long short-term memory.</p></fn><fn id="table1fn10"><p><sup>j</sup>HMM: hidden Markov model.</p></fn><fn id="table1fn11"><p><sup>k</sup>VGG-16: visual geometry group 16 layers. </p></fn><fn id="table1fn12"><p><sup>l</sup>CPU: central processing unit. </p></fn><fn id="table1fn13"><p><sup>m</sup>RNN-LSTM: recurrent neural network with a long short-term memory.</p></fn><fn id="table1fn14"><p><sup>n</sup>YOLO: You Only Look Once.</p></fn><fn id="table1fn15"><p><sup>o</sup>DTW: dynamic time warping.</p></fn><fn id="table1fn16"><p><sup>p</sup>CMU: speech recognition toolkit. </p></fn><fn id="table1fn17"><p><sup>q</sup>VOSK: offline speech recognition toolkit [<xref ref-type="bibr" rid="ref43">43</xref>]. </p></fn><fn id="table1fn18"><p><sup>r</sup>Mobile phones and augmented reality glasses: portable electronic devices that enable interactive digital experiences.</p></fn><fn id="table1fn19"><p><sup>s</sup>MADAMIRA: a system for morphological analysis and disambiguation of Arabic [<xref ref-type="bibr" rid="ref44">44</xref>]. </p></fn></table-wrap-foot></table-wrap><table-wrap id="t2" position="float"><label>Table 2.</label><caption><p>Main characteristics of the recognition approaches of sign language recognition systems.</p></caption><table id="table2" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom"/><td align="left" valign="bottom" colspan="3">Corpus formation</td><td align="left" valign="bottom">Captures facial expressions and body movement</td><td align="left" valign="bottom">Health context</td></tr><tr><td align="left" valign="bottom"/><td align="left" valign="bottom">Type</td><td align="left" valign="bottom">Isolated words</td><td align="left" valign="bottom">Sentences</td><td align="left" valign="bottom"/><td align="left" valign="bottom"/></tr></thead><tbody><tr><td align="left" valign="top" colspan="6">Image-based</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Xia et al (2022) [<xref ref-type="bibr" rid="ref13">13</xref>]</td><td align="left" valign="top">Isolated words</td><td align="char" char="." valign="top">19</td><td align="char" char="." valign="top">0</td><td align="left" valign="top">N/A<sup><xref ref-type="table-fn" rid="table2fn1">a</xref></sup></td><td align="left" valign="top">General hospital care</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Pikoulis et al (2022) [<xref ref-type="bibr" rid="ref14">14</xref>]</td><td align="left" valign="top">Isolated words and sentences</td><td align="left" valign="top">6319 words with 1374 of them being unique (excluding repetitions)</td><td align="left" valign="top">1029 simple sentences with 945 of them being unique (excluding repetitions).</td><td align="left" valign="top">N/A</td><td align="left" valign="top">Psychiatric interviews</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>da Silva et al (2024) [<xref ref-type="bibr" rid="ref15">15</xref>]</td><td align="left" valign="top">Isolated words</td><td align="char" char="." valign="top">50</td><td align="char" char="." valign="top">0</td><td align="left" valign="top">Yes</td><td align="left" valign="top">General hospital care</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Das et al (2023) [<xref ref-type="bibr" rid="ref25">25</xref>]</td><td align="left" valign="top">Isolated words</td><td align="char" char="." valign="top">8</td><td align="char" char="." valign="top">0</td><td align="left" valign="top">N/A</td><td align="left" valign="top">Emergency situations</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Ko et al (2019) [<xref ref-type="bibr" rid="ref26">26</xref>]</td><td align="left" valign="top">Isolated words and sentences</td><td align="char" char="." valign="top">419</td><td align="char" char="." valign="top">105</td><td align="left" valign="top">Yes</td><td align="left" valign="top">Emergency situations</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Barrientos-Villalta et al (2022) [<xref ref-type="bibr" rid="ref27">27</xref>]</td><td align="left" valign="top">Isolated words</td><td align="char" char="." valign="top">17</td><td align="char" char="." valign="top">0</td><td align="left" valign="top">Yes</td><td align="left" valign="top">General hospital care</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Ram&#x00ED;rez S&#x00E1;nchez et al (2021) [<xref ref-type="bibr" rid="ref28">28</xref>]</td><td align="left" valign="top">Isolated words and sentences</td><td align="char" char="." valign="top">49</td><td align="char" char="." valign="top">20</td><td align="left" valign="top">Yes</td><td align="left" valign="top">Primary care consultations</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Gandhi et al (2021) [<xref ref-type="bibr" rid="ref30">30</xref>]</td><td align="left" valign="top">Isolated words</td><td align="char" char="." valign="top">20</td><td align="char" char="." valign="top">0</td><td align="left" valign="top">Yes</td><td align="left" valign="top">General hospital care</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Uchil et al (2020) [<xref ref-type="bibr" rid="ref31">31</xref>]</td><td align="left" valign="top">Isolated words</td><td align="char" char="." valign="top">20</td><td align="char" char="." valign="top">0</td><td align="left" valign="top">Yes</td><td align="left" valign="top">General hospital care</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Areeb et al (2022) [<xref ref-type="bibr" rid="ref22">22</xref>]</td><td align="left" valign="top">Isolated words</td><td align="char" char="." valign="top">8</td><td align="char" char="." valign="top">0</td><td align="left" valign="top">N/A</td><td align="left" valign="top">Emergency situations</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Adithya and Rajesh (2020) [<xref ref-type="bibr" rid="ref36">36</xref>]</td><td align="left" valign="top">Isolated words</td><td align="char" char="." valign="top">8</td><td align="char" char="." valign="top">0</td><td align="left" valign="top">No</td><td align="left" valign="top">Emergency situations</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Ihsan et al (2024) [<xref ref-type="bibr" rid="ref37">37</xref>]</td><td align="left" valign="top">Isolated words</td><td align="char" char="." valign="top">30</td><td align="char" char="." valign="top">0</td><td align="left" valign="top">N/A</td><td align="left" valign="top">General hospital care</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Das et al (2024) [<xref ref-type="bibr" rid="ref38">38</xref>]</td><td align="left" valign="top">Isolated words</td><td align="char" char="." valign="top">6</td><td align="char" char="." valign="top">0</td><td align="left" valign="top">N/A</td><td align="left" valign="top">Primary care consultations</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Faisal et al (2023) [<xref ref-type="bibr" rid="ref39">39</xref>]</td><td align="left" valign="top">Isolated words</td><td align="char" char="." valign="top">293</td><td align="char" char="." valign="top">0</td><td align="left" valign="top">N/A</td><td align="left" valign="top">Primary care consultations</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Bellil et al (2024) [<xref ref-type="bibr" rid="ref42">42</xref>]</td><td align="left" valign="top">Isolated words</td><td align="char" char="." valign="top">10</td><td align="char" char="." valign="top">0</td><td align="left" valign="top">No</td><td align="left" valign="top">General hospital care</td></tr><tr><td align="left" valign="top" colspan="6">Sensor-based (depth-sensing)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Hisham and Hamouda (2019) [<xref ref-type="bibr" rid="ref29">29</xref>]</td><td align="left" valign="top">Isolated words</td><td align="char" char="." valign="top">42</td><td align="char" char="." valign="top">0</td><td align="left" valign="top">No</td><td align="left" valign="top">General hospital care</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Sarhan et al (2015) [<xref ref-type="bibr" rid="ref33">33</xref>]</td><td align="left" valign="top">Isolated words</td><td align="char" char="." valign="top">16</td><td align="char" char="." valign="top">0</td><td align="left" valign="top">No</td><td align="left" valign="top">Emergency consultations</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>S&#x00FC;zg&#x00FC;n et al (2015) [<xref ref-type="bibr" rid="ref35">35</xref>]</td><td align="left" valign="top">Isolated words</td><td align="char" char="." valign="top">33</td><td align="char" char="." valign="top">0</td><td align="left" valign="top">N/A</td><td align="left" valign="top">General hospital care</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Dewasurendra et al (2020) [<xref ref-type="bibr" rid="ref41">41</xref>]</td><td align="left" valign="top">Isolated words</td><td align="char" char="." valign="top">0</td><td align="left" valign="top">N/A</td><td align="left" valign="top">No</td><td align="left" valign="top">Emergency situations</td></tr><tr><td align="left" valign="top" colspan="6">Sensor-based (glove-based)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Deji Dere et al (2022) [<xref ref-type="bibr" rid="ref32">32</xref>]</td><td align="left" valign="top">Isolated words</td><td align="char" char="." valign="top">5</td><td align="char" char="." valign="top">0</td><td align="left" valign="top">No</td><td align="left" valign="top">Emergency situations</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Guo et al (2023) [<xref ref-type="bibr" rid="ref34">34</xref>]</td><td align="left" valign="top">Isolated words</td><td align="left" valign="top">More than 550</td><td align="char" char="." valign="top">0</td><td align="left" valign="top">N/A</td><td align="left" valign="top">Emergency situations</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Luqman and Mahmoud (2020) [<xref ref-type="bibr" rid="ref40">40</xref>]</td><td align="left" valign="top">Isolated words and sentences</td><td align="left" valign="top">3327 sign words and 30,296 singular words with their plurals</td><td align="char" char="." valign="top">600</td><td align="left" valign="top">Yes</td><td align="left" valign="top">General hospital care</td></tr><tr><td align="left" valign="top" colspan="6">Hybrid</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Sosa-Jim&#x00E9;nez et al (2022) [<xref ref-type="bibr" rid="ref12">12</xref>]</td><td align="left" valign="top">Isolated words</td><td align="char" char="." valign="top">43</td><td align="char" char="." valign="top">0</td><td align="left" valign="top">Yes</td><td align="left" valign="top">Primary care consultations</td></tr></tbody></table><table-wrap-foot><fn id="table2fn1"><p><sup>a</sup>N/A: not applicable.</p></fn></table-wrap-foot></table-wrap><p>The system developed by Sosa-Jim&#x00E9;nez et al [<xref ref-type="bibr" rid="ref12">12</xref>] is the only one classified as hybrid, as it leverages Kinect&#x2019;s red, green, blue, and depth sensors to recognize signs through both visual input and 3D skeletal tracking. Because it is the only hybrid system, it is presented separately in <xref ref-type="table" rid="table1">Table 1</xref> and <xref ref-type="supplementary-material" rid="app5">Multimedia Appendix 5</xref>; however, in the following sections, it will be discussed alongside the sensor-based systems (depth-sensing).</p></sec><sec id="s3-3"><title>Aspects Related to System Development</title><sec id="s3-3-1"><title>Languages Involved, Direction of Communication, and Capturing of Facial Expressions</title><sec id="s3-3-1-1"><title>Image-Based</title><p>The systems studied supported a range of spoken-sign language pairs, including Arabic (Algerian and Saudi), English, Chinese, Brazilian, Portuguese, Greek, Indian, Korean, Peruvian, and Mexican Spanish, and their respective sign languages (<xref ref-type="table" rid="table2">Table 2</xref>). Only 3 papers successfully implemented a bidirectional communication between spoken and sign languages [<xref ref-type="bibr" rid="ref15">15</xref>,<xref ref-type="bibr" rid="ref35">35</xref>,<xref ref-type="bibr" rid="ref39">39</xref>], while another study attempted bidirectionality but did not achieve it [<xref ref-type="bibr" rid="ref13">13</xref>].</p><p>Only 2 studies explicitly addressed structural and cultural differences between spoken and sign languages in their translation processes [<xref ref-type="bibr" rid="ref15">15</xref>,<xref ref-type="bibr" rid="ref28">28</xref>]. Additionally, 7 systems incorporated the recognition of facial expressions, facial key points, and other nonmanual features (such as audio phonemes and body posture) to enhance translation accuracy and communication effectiveness [<xref ref-type="bibr" rid="ref15">15</xref>,<xref ref-type="bibr" rid="ref26">26</xref>-<xref ref-type="bibr" rid="ref28">28</xref>,<xref ref-type="bibr" rid="ref30">30</xref>,<xref ref-type="bibr" rid="ref31">31</xref>,<xref ref-type="bibr" rid="ref39">39</xref>].</p></sec><sec id="s3-3-1-2"><title>Sensor-Based</title><p>These systems also supported multiple spoken-sign language pairs, including combinations involving Arabic (and its Egyptian variant), English, Turkish, Sinhala, and Mexican Spanish, along with their respective national sign languages (<xref ref-type="table" rid="table1">Table 1</xref>).</p><p>In total, 3 studies reported successful implementation of bidirectional communication between spoken and sign languages [<xref ref-type="bibr" rid="ref12">12</xref>,<xref ref-type="bibr" rid="ref34">34</xref>,<xref ref-type="bibr" rid="ref41">41</xref>]. One study took linguistic differences into account by applying rule-based morphological and syntactic processing to accommodate features specific to Arabic Sign Language&#x2014;such as subject-initial word order, lack of inflection, and the use of spatial and visual cues&#x2014;thus enabling transformation into grammatically correct Arabic sentences [<xref ref-type="bibr" rid="ref40">40</xref>].</p><p>Two sensor-based systems also linked corpus construction to the use of facial expression and body movement capture for more precise recognition [<xref ref-type="bibr" rid="ref12">12</xref>,<xref ref-type="bibr" rid="ref32">32</xref>]. As anticipated, no glove-based system used such an approach.</p></sec></sec></sec><sec id="s3-4"><title>Corpus Used to Generate the Language Database for System Development</title><sec id="s3-4-1"><title>Image-Based</title><p>Overall, most systems relied on corpora composed of isolated words or terms, except for 3 studies, which included full sentences [<xref ref-type="bibr" rid="ref14">14</xref>,<xref ref-type="bibr" rid="ref26">26</xref>,<xref ref-type="bibr" rid="ref28">28</xref>]. Two studies defined their corpus based on real-world contexts [<xref ref-type="bibr" rid="ref15">15</xref>,<xref ref-type="bibr" rid="ref26">26</xref>], while others relied on datasets designed by the authors [<xref ref-type="bibr" rid="ref13">13</xref>,<xref ref-type="bibr" rid="ref30">30</xref>], consulted health professionals [<xref ref-type="bibr" rid="ref14">14</xref>], or extracted content from dictionaries [<xref ref-type="bibr" rid="ref31">31</xref>]. Four systems focused on essential or emergency vocabulary, often including the alphabet, numbers, and isolated signs relevant to health care, obtained through internet searches or expert input [<xref ref-type="bibr" rid="ref22">22</xref>,<xref ref-type="bibr" rid="ref25">25</xref>,<xref ref-type="bibr" rid="ref36">36</xref>].</p></sec><sec id="s3-4-2"><title>Sensor-Based</title><p>One glove-based system developed its corpus through consultations with health professionals and internet searches [<xref ref-type="bibr" rid="ref32">32</xref>], while another was based on real emergency call transcripts [<xref ref-type="bibr" rid="ref34">34</xref>]. A third glove-based system included an extensive set of 600 health-related sentences covering various types (nominal, verbal, and questions) with over 3000 signs [<xref ref-type="bibr" rid="ref40">40</xref>]. A fourth one focused on basic signs only (fingerspelling the alphabet, numbers, and isolated sign words) [<xref ref-type="bibr" rid="ref32">32</xref>].</p><p>Among depth-sensing systems, 3 built their corpora using isolated words or terms [<xref ref-type="bibr" rid="ref29">29</xref>,<xref ref-type="bibr" rid="ref33">33</xref>], while others developed their corpora in consultation with health care professionals [<xref ref-type="bibr" rid="ref12">12</xref>,<xref ref-type="bibr" rid="ref35">35</xref>] or constructed its corpus using full sentences commonly used during emergency calls [<xref ref-type="bibr" rid="ref41">41</xref>].</p></sec></sec><sec id="s3-5"><title>Health Care Context</title><sec id="s3-5-1"><title>Image-Based</title><p>Among the 15 studies in this group, 46.7% (7/15) were based on the general hospital environment [<xref ref-type="bibr" rid="ref13">13</xref>,<xref ref-type="bibr" rid="ref15">15</xref>,<xref ref-type="bibr" rid="ref27">27</xref>,<xref ref-type="bibr" rid="ref30">30</xref>,<xref ref-type="bibr" rid="ref31">31</xref>,<xref ref-type="bibr" rid="ref37">37</xref>,<xref ref-type="bibr" rid="ref42">42</xref>], 36.7% (4/15) focused on emergency services or situations [<xref ref-type="bibr" rid="ref22">22</xref>,<xref ref-type="bibr" rid="ref25">25</xref>,<xref ref-type="bibr" rid="ref26">26</xref>,<xref ref-type="bibr" rid="ref36">36</xref>], 20% (3/15) were based on primary care consultations [<xref ref-type="bibr" rid="ref28">28</xref>,<xref ref-type="bibr" rid="ref38">38</xref>,<xref ref-type="bibr" rid="ref39">39</xref>], and 6.7% (1/15) on psychiatric interviews [<xref ref-type="bibr" rid="ref14">14</xref>].</p></sec><sec id="s3-5-2"><title>Sensor-Based</title><p>In total, 2 of the glove-based systems used emergency services or situations as a basis [<xref ref-type="bibr" rid="ref32">32</xref>,<xref ref-type="bibr" rid="ref34">34</xref>], and the remaining one used the general hospital environment [<xref ref-type="bibr" rid="ref40">40</xref>], while 2 depth-sensing systems were based on care provided in a general hospital environment [<xref ref-type="bibr" rid="ref29">29</xref>,<xref ref-type="bibr" rid="ref35">35</xref>], 1 used the context of primary care consultations [<xref ref-type="bibr" rid="ref12">12</xref>], and 2 used emergency services [<xref ref-type="bibr" rid="ref33">33</xref>,<xref ref-type="bibr" rid="ref41">41</xref>].</p></sec></sec><sec id="s3-6"><title>Approaches and Techniques for System Development</title><sec id="s3-6-1"><title>Image-Based</title><p>In the image-based systems reviewed, different datasets have been created, involving capturing images in many lighting conditions and backgrounds, recording of signs made by users fluent in sign languages, extracting keyframes from videos [<xref ref-type="bibr" rid="ref15">15</xref>,<xref ref-type="bibr" rid="ref22">22</xref>,<xref ref-type="bibr" rid="ref25">25</xref>-<xref ref-type="bibr" rid="ref28">28</xref>,<xref ref-type="bibr" rid="ref33">33</xref>-<xref ref-type="bibr" rid="ref38">38</xref>], and image stacking [<xref ref-type="bibr" rid="ref30">30</xref>]. These datasets have been divided into training, validation, and test sets for developing and evaluating the proposed systems [<xref ref-type="bibr" rid="ref13">13</xref>,<xref ref-type="bibr" rid="ref14">14</xref>].</p><p>Data processing included methods such as hierarchical classification, feature extraction through neural networks, human key point estimation, and pose, hand, and skeleton coordinates of people in videos [<xref ref-type="bibr" rid="ref14">14</xref>,<xref ref-type="bibr" rid="ref15">15</xref>,<xref ref-type="bibr" rid="ref25">25</xref>,<xref ref-type="bibr" rid="ref27">27</xref>]. Different models were used, such as CNNs, long short-term memory, hidden Markov model (HMM), OpenPose, 3D graph convolutional network, long-term recurrent convolutional network, 1D-CNN, and bidirectional long short-term memory, both individually and in combination, for the classification and prediction of sign language signals [<xref ref-type="bibr" rid="ref15">15</xref>,<xref ref-type="bibr" rid="ref22">22</xref>,<xref ref-type="bibr" rid="ref26">26</xref>,<xref ref-type="bibr" rid="ref27">27</xref>,<xref ref-type="bibr" rid="ref30">30</xref>,<xref ref-type="bibr" rid="ref37">37</xref>-<xref ref-type="bibr" rid="ref39">39</xref>,<xref ref-type="bibr" rid="ref42">42</xref>].</p><p>Additionally, only 1 paper presents an avatar module that transforms a hearing person&#x2019;s text into sign language [<xref ref-type="bibr" rid="ref39">39</xref>]. The system comprises 3 modules: a signal recognition module, a speech recognition and synthesis module, and an avatar module. Each module performs specific tasks to ensure the integrated functioning of the system.</p></sec><sec id="s3-6-2"><title>Sensor-Based</title><p>In sensor-based systems, development methods involved capturing sign language signs with a Kinect, Leap Motion Controller, and cameras on wearable devices such as smart glasses [<xref ref-type="bibr" rid="ref32">32</xref>-<xref ref-type="bibr" rid="ref35">35</xref>,<xref ref-type="bibr" rid="ref41">41</xref>]. Then, the captured data were processed and analyzed to extract relevant characteristics, such as position, movement, and shape of the hand; wrist trajectory; and other specific features of the sign [<xref ref-type="bibr" rid="ref32">32</xref>,<xref ref-type="bibr" rid="ref34">34</xref>].</p><p>Different techniques have been applied in the development of these systems, including machine learning models such as random forest, na&#x00EF;ve Bayes classifier, Ada-Boosting, dynamic time warping, and HMMs [<xref ref-type="bibr" rid="ref12">12</xref>,<xref ref-type="bibr" rid="ref29">29</xref>,<xref ref-type="bibr" rid="ref35">35</xref>]. Furthermore, preprocessing, segmentation, and feature extraction techniques have also been used to improve the effectiveness and accuracy of the systems in detecting and interpreting signs [<xref ref-type="bibr" rid="ref32">32</xref>-<xref ref-type="bibr" rid="ref34">34</xref>].</p><p>One of the systems also integrated machine translation processes by applying morphological and syntactic analysis to restructure sentence structure and ensure grammatical agreement [<xref ref-type="bibr" rid="ref40">40</xref>].</p></sec></sec><sec id="s3-7"><title>Technologies Involved</title><p>The technologies involved in system development are shown in <xref ref-type="supplementary-material" rid="app6">Multimedia Appendix 6</xref>.</p></sec><sec id="s3-8"><title>The Infrastructure and Training Required for Use</title><sec id="s3-8-1"><title>Image-Based</title><p>Desktop and notebook computers, cameras, sensors, and supporting devices and equipment were needed to implement 3 proposed systems [<xref ref-type="bibr" rid="ref25">25</xref>,<xref ref-type="bibr" rid="ref27">27</xref>,<xref ref-type="bibr" rid="ref36">36</xref>,<xref ref-type="bibr" rid="ref42">42</xref>], whereas a mobile phone was required for 4 of them [<xref ref-type="bibr" rid="ref27">27</xref>,<xref ref-type="bibr" rid="ref30">30</xref>,<xref ref-type="bibr" rid="ref31">31</xref>,<xref ref-type="bibr" rid="ref42">42</xref>]. Beyond internet connectivity, 1 proposal [<xref ref-type="bibr" rid="ref29">29</xref>] required downloading apps. In the image-based group, 2 systems explicitly mentioned using a phone&#x2019;s camera [<xref ref-type="bibr" rid="ref30">30</xref>,<xref ref-type="bibr" rid="ref31">31</xref>], and 3 mentioned using a webcam or external camera [<xref ref-type="bibr" rid="ref13">13</xref>,<xref ref-type="bibr" rid="ref36">36</xref>,<xref ref-type="bibr" rid="ref37">37</xref>]. None of the studies addressed the details of the training required for the translation system. In total, 3 papers failed to address the structure and training required for use [<xref ref-type="bibr" rid="ref14">14</xref>,<xref ref-type="bibr" rid="ref22">22</xref>,<xref ref-type="bibr" rid="ref26">26</xref>].</p></sec><sec id="s3-8-2"><title>Sensor-Based</title><p>Three systems, all depth sensing [<xref ref-type="bibr" rid="ref29">29</xref>,<xref ref-type="bibr" rid="ref33">33</xref>,<xref ref-type="bibr" rid="ref35">35</xref>], addressed the need for a computer connected to a power source. Only 1 depth-sensing system pointed out the need for a mobile phone with an integrated camera [<xref ref-type="bibr" rid="ref41">41</xref>]. One glove-based [<xref ref-type="bibr" rid="ref34">34</xref>] proposal required downloading an app, and this was the only one that described the use of a phone&#x2019;s camera, additionally requiring augmented reality glasses with a Bluetooth interface. Another one, from the depth-sensing type [<xref ref-type="bibr" rid="ref33">33</xref>], mentioned the existence of an external camera device. Microsoft Kinect is a standard device for sensor-based systems that uses depth sensing [<xref ref-type="bibr" rid="ref45">45</xref>]. None of the studies addressed the details of the training required for the translation system. One paper did not discuss the necessary structure and training for implementation [<xref ref-type="bibr" rid="ref40">40</xref>].</p></sec></sec><sec id="s3-9"><title>Multidisciplinary Partnerships</title><sec id="s3-9-1"><title>Image-Based</title><p>Various disciplines were represented among the research teams, who reported back to university departments of informatics [<xref ref-type="bibr" rid="ref14">14</xref>,<xref ref-type="bibr" rid="ref30">30</xref>], applied sciences [<xref ref-type="bibr" rid="ref27">27</xref>,<xref ref-type="bibr" rid="ref42">42</xref>], engineering [<xref ref-type="bibr" rid="ref13">13</xref>,<xref ref-type="bibr" rid="ref14">14</xref>,<xref ref-type="bibr" rid="ref30">30</xref>,<xref ref-type="bibr" rid="ref37">37</xref>,<xref ref-type="bibr" rid="ref38">38</xref>], technology [<xref ref-type="bibr" rid="ref26">26</xref>,<xref ref-type="bibr" rid="ref30">30</xref>,<xref ref-type="bibr" rid="ref31">31</xref>,<xref ref-type="bibr" rid="ref37">37</xref>], computer science [<xref ref-type="bibr" rid="ref22">22</xref>,<xref ref-type="bibr" rid="ref38">38</xref>,<xref ref-type="bibr" rid="ref39">39</xref>,<xref ref-type="bibr" rid="ref42">42</xref>], and biomedical physics [<xref ref-type="bibr" rid="ref37">37</xref>]. Only 4 studies mentioned the participation of linguistic specialists [<xref ref-type="bibr" rid="ref13">13</xref>,<xref ref-type="bibr" rid="ref15">15</xref>,<xref ref-type="bibr" rid="ref26">26</xref>,<xref ref-type="bibr" rid="ref32">32</xref>] during corpus development or database recording to improve sign language representation, and only 1 mentioned having involved physicians [<xref ref-type="bibr" rid="ref14">14</xref>] in selecting the words to create the corpus. Along with sign language interpreters, hearing-impaired native sign language speakers [<xref ref-type="bibr" rid="ref11">11</xref>,<xref ref-type="bibr" rid="ref14">14</xref>,<xref ref-type="bibr" rid="ref26">26</xref>] were engaged to record the videos.</p></sec><sec id="s3-9-2"><title>Sensor-Based</title><p>The sensor-based systems encompassed a great variety of backgrounds, including university departments of statistics [<xref ref-type="bibr" rid="ref12">12</xref>], AI [<xref ref-type="bibr" rid="ref12">12</xref>,<xref ref-type="bibr" rid="ref32">32</xref>], informatics [<xref ref-type="bibr" rid="ref12">12</xref>,<xref ref-type="bibr" rid="ref41">41</xref>], applied sciences [<xref ref-type="bibr" rid="ref12">12</xref>], engineering [<xref ref-type="bibr" rid="ref29">29</xref>,<xref ref-type="bibr" rid="ref33">33</xref>,<xref ref-type="bibr" rid="ref35">35</xref>], technology [<xref ref-type="bibr" rid="ref33">33</xref>,<xref ref-type="bibr" rid="ref41">41</xref>], and computer science [<xref ref-type="bibr" rid="ref32">32</xref>,<xref ref-type="bibr" rid="ref34">34</xref>,<xref ref-type="bibr" rid="ref40">40</xref>]. To contribute to the enhancement of sign language representation, some systems involved linguistic specialists [<xref ref-type="bibr" rid="ref35">35</xref>] in the development of a corpus or database, and also deaf native sign language speakers [<xref ref-type="bibr" rid="ref32">32</xref>,<xref ref-type="bibr" rid="ref34">34</xref>] and sign language interpreters worked on recording the videos. Physicians [<xref ref-type="bibr" rid="ref12">12</xref>,<xref ref-type="bibr" rid="ref32">32</xref>,<xref ref-type="bibr" rid="ref35">35</xref>] and deaf individuals [<xref ref-type="bibr" rid="ref35">35</xref>] were also involved in selecting the words for the corpus.</p></sec></sec><sec id="s3-10"><title>Sample Size (Sample Size of Individuals to Develop the System, Users, and Content)</title><sec id="s3-10-1"><title>Image-Based</title><p>The sample size (number of individuals) used to develop the systems ranged from 2 [<xref ref-type="bibr" rid="ref31">31</xref>] to 26 [<xref ref-type="bibr" rid="ref22">22</xref>,<xref ref-type="bibr" rid="ref25">25</xref>,<xref ref-type="bibr" rid="ref36">36</xref>] among systems that used the same people for development and testing, and 1 study did not inform the sample size used in the development phase [<xref ref-type="bibr" rid="ref38">38</xref>]. For systems that used different individuals for development and testing, the number used in the development phase ranged from 14 [<xref ref-type="bibr" rid="ref26">26</xref>] to 33 [<xref ref-type="bibr" rid="ref39">39</xref>], and the number of individuals used in the test phase ranged from 2 [<xref ref-type="bibr" rid="ref39">39</xref>] to 6 [<xref ref-type="bibr" rid="ref26">26</xref>]. Four studies [<xref ref-type="bibr" rid="ref27">27</xref>,<xref ref-type="bibr" rid="ref28">28</xref>,<xref ref-type="bibr" rid="ref30">30</xref>,<xref ref-type="bibr" rid="ref42">42</xref>] did not mention whether they used the same individuals for both development and testing. The dataset&#x2019;s contents ranged from 21 scripts [<xref ref-type="bibr" rid="ref14">14</xref>] to 145,035 videos (<xref ref-type="table" rid="table2">Table 2</xref>) [<xref ref-type="bibr" rid="ref39">39</xref>].</p></sec><sec id="s3-10-2"><title>Sensor-Based</title><p>The sample size (number of individuals) used to develop the systems ranged from 2 [<xref ref-type="bibr" rid="ref32">32</xref>,<xref ref-type="bibr" rid="ref40">40</xref>] to 6 [<xref ref-type="bibr" rid="ref34">34</xref>] among systems that used the same people for development and testing. For systems that used different people for development and testing, the number of people to develop ranged from 2 [<xref ref-type="bibr" rid="ref40">40</xref>] to 12 [<xref ref-type="bibr" rid="ref12">12</xref>] (2 studies [<xref ref-type="bibr" rid="ref33">33</xref>,<xref ref-type="bibr" rid="ref46">46</xref>] did not inform the sample size in development), and the number of users to test ranged from 2 [<xref ref-type="bibr" rid="ref39">39</xref>] to 10 [<xref ref-type="bibr" rid="ref29">29</xref>].</p><p>The content of the datasets ranged from 10 videos (5 for the training phase and 5 for the testing phase) [<xref ref-type="bibr" rid="ref32">32</xref>] to 1260 samples (840 samples for the training set and 420 samples for the testing set) [<xref ref-type="bibr" rid="ref29">29</xref>]. In total, 2 studies did not inform the size of the content of their datasets (<xref ref-type="table" rid="table2">Table 2</xref>) [<xref ref-type="bibr" rid="ref40">40</xref>,<xref ref-type="bibr" rid="ref41">41</xref>].</p></sec></sec><sec id="s3-11"><title>Cost</title><sec id="s3-11-1"><title>Image-Based</title><p>None of the included studies provided quantitative data on the costs associated with the development of their SLR systems, either in terms of technological infrastructure or human resources.</p></sec><sec id="s3-11-2"><title>Sensor-Based</title><p>As with image-based studies, no quantitative analysis has been reported regarding the costs for developing the SLR systems. However, one of the studies mentioned that the development cost of the device &#x201C;SmartCall&#x201D; was much less when compared to those of similar works (which used sensors embedded in smartwatches), suggesting that researchers in low- and middle-income countries could readily prototype their own devices [<xref ref-type="bibr" rid="ref32">32</xref>].</p></sec></sec><sec id="s3-12"><title>Testing</title><sec id="s3-12-1"><title>Image-Based</title><p>The tests have been conducted in simulated environments involving different training networks, varying noise levels, and different scenarios [<xref ref-type="bibr" rid="ref13">13</xref>,<xref ref-type="bibr" rid="ref14">14</xref>]. Data splitting strategies into training and testing sets [<xref ref-type="bibr" rid="ref15">15</xref>,<xref ref-type="bibr" rid="ref22">22</xref>,<xref ref-type="bibr" rid="ref25">25</xref>,<xref ref-type="bibr" rid="ref36">36</xref>-<xref ref-type="bibr" rid="ref39">39</xref>] and comparing different methods [<xref ref-type="bibr" rid="ref18">18</xref>] have been used to evaluate the accuracy and effectiveness of the developed systems in detecting and interpreting signs.</p><p>Accuracy measures in detecting and interpreting signs ranged from 25% [<xref ref-type="bibr" rid="ref27">27</xref>] to 100% [<xref ref-type="bibr" rid="ref42">42</xref>]. Precision ranged from 91.5% [<xref ref-type="bibr" rid="ref25">25</xref>] to 100% [<xref ref-type="bibr" rid="ref42">42</xref>] (5 systems) [<xref ref-type="bibr" rid="ref22">22</xref>,<xref ref-type="bibr" rid="ref24">24</xref>,<xref ref-type="bibr" rid="ref25">25</xref>,<xref ref-type="bibr" rid="ref37">37</xref>,<xref ref-type="bibr" rid="ref42">42</xref>]. Recall ranged from 90.1% [<xref ref-type="bibr" rid="ref25">25</xref>] to 100% [<xref ref-type="bibr" rid="ref42">42</xref>] (4 systems) [<xref ref-type="bibr" rid="ref24">24</xref>,<xref ref-type="bibr" rid="ref25">25</xref>,<xref ref-type="bibr" rid="ref37">37</xref>,<xref ref-type="bibr" rid="ref42">42</xref>]. <italic>F</italic><sub>1</sub>-score ranged from 90.7% [<xref ref-type="bibr" rid="ref25">25</xref>] to 100% [<xref ref-type="bibr" rid="ref42">42</xref>] (4 systems) [<xref ref-type="bibr" rid="ref24">24</xref>,<xref ref-type="bibr" rid="ref25">25</xref>,<xref ref-type="bibr" rid="ref37">37</xref>,<xref ref-type="bibr" rid="ref42">42</xref>]. No study presented specificity and sensitivity (<xref ref-type="supplementary-material" rid="app5">Multimedia Appendix 5</xref>).</p><p>A common difficulty was the failure of some systems to predict similar hand movements, given the proximity of the key points [<xref ref-type="bibr" rid="ref22">22</xref>,<xref ref-type="bibr" rid="ref30">30</xref>,<xref ref-type="bibr" rid="ref31">31</xref>]. For that reason, double-handed signs were more accurately classified than single-handed ones [<xref ref-type="bibr" rid="ref22">22</xref>].</p></sec><sec id="s3-12-2"><title>Sensor-Based</title><p>The tests were conducted using datasets, in which different users performed sign language signs repeatedly in different scenarios [<xref ref-type="bibr" rid="ref12">12</xref>,<xref ref-type="bibr" rid="ref32">32</xref>,<xref ref-type="bibr" rid="ref35">35</xref>]. Testing also involved splitting data into training, validation, and test sets and evaluating the performance of the systems [<xref ref-type="bibr" rid="ref29">29</xref>,<xref ref-type="bibr" rid="ref32">32</xref>,<xref ref-type="bibr" rid="ref33">33</xref>]. Additionally, usability and user acceptance tests have been conducted to assess the effectiveness and practicality of the systems in real-life situations [<xref ref-type="bibr" rid="ref34">34</xref>,<xref ref-type="bibr" rid="ref35">35</xref>].</p><p>Accuracy measures in detecting and interpreting signs ranged from 80.5% [<xref ref-type="bibr" rid="ref33">33</xref>] to 99.8% [<xref ref-type="bibr" rid="ref41">41</xref>] (8 systems) [<xref ref-type="bibr" rid="ref12">12</xref>,<xref ref-type="bibr" rid="ref29">29</xref>,<xref ref-type="bibr" rid="ref32">32</xref>-<xref ref-type="bibr" rid="ref35">35</xref>,<xref ref-type="bibr" rid="ref40">40</xref>,<xref ref-type="bibr" rid="ref41">41</xref>], being 80.5% [<xref ref-type="bibr" rid="ref33">33</xref>] to 99.8% [<xref ref-type="bibr" rid="ref41">41</xref>] for systems with depth-sensing sensors and 72% [<xref ref-type="bibr" rid="ref32">32</xref>] to 92% [<xref ref-type="bibr" rid="ref40">40</xref>] for systems with hand glove sensors. Only 1 study [<xref ref-type="bibr" rid="ref12">12</xref>] presented precision (&#x201C;&#x003E;90%&#x201D;). Recall ranged from 90.1% [<xref ref-type="bibr" rid="ref25">25</xref>] to 98.8% [<xref ref-type="bibr" rid="ref24">24</xref>] (3 systems) [<xref ref-type="bibr" rid="ref24">24</xref>,<xref ref-type="bibr" rid="ref25">25</xref>,<xref ref-type="bibr" rid="ref37">37</xref>]. <italic>F</italic><sub>1</sub>-score ranged from 85% [<xref ref-type="bibr" rid="ref32">32</xref>] to 88.6% [<xref ref-type="bibr" rid="ref12">12</xref>] (2 systems) [<xref ref-type="bibr" rid="ref12">12</xref>,<xref ref-type="bibr" rid="ref32">32</xref>]. Only 1 study [<xref ref-type="bibr" rid="ref12">12</xref>] presented specificity and sensitivity (99.8% and 87.9%, respectively; <xref ref-type="supplementary-material" rid="app5">Multimedia Appendix 5</xref>).</p></sec></sec><sec id="s3-13"><title>Evaluation or Use in Real Context</title><sec id="s3-13-1"><title>Image-Based</title><p>Among image-based studies, authors of 11 of the studies [<xref ref-type="bibr" rid="ref13">13</xref>-<xref ref-type="bibr" rid="ref15">15</xref>,<xref ref-type="bibr" rid="ref22">22</xref>,<xref ref-type="bibr" rid="ref25">25</xref>-<xref ref-type="bibr" rid="ref28">28</xref>,<xref ref-type="bibr" rid="ref31">31</xref>,<xref ref-type="bibr" rid="ref36">36</xref>,<xref ref-type="bibr" rid="ref38">38</xref>,<xref ref-type="bibr" rid="ref39">39</xref>] did not respond to the emails sent by the research team, so it was impossible to update the current status of their systems. Authors of 2 of the studies informed that their systems have not been implemented in real scenarios or commercial settings [<xref ref-type="bibr" rid="ref30">30</xref>,<xref ref-type="bibr" rid="ref42">42</xref>]. One author mentioned that his team is in the process of developing a smartphone app for interpreting medical signs and that they will proceed to real-world implementation once it is ready [<xref ref-type="bibr" rid="ref37">37</xref>].</p></sec><sec id="s3-13-2"><title>Sensor-Based</title><p>Among sensor-based studies, the authors of 3 of the studies [<xref ref-type="bibr" rid="ref29">29</xref>,<xref ref-type="bibr" rid="ref33">33</xref>,<xref ref-type="bibr" rid="ref41">41</xref>] did not respond to the emails sent by the research team, so it was not possible to update the current status of their systems. Authors of 4 other studies informed that their systems have never been deployed or commercialized [<xref ref-type="bibr" rid="ref12">12</xref>,<xref ref-type="bibr" rid="ref34">34</xref>,<xref ref-type="bibr" rid="ref35">35</xref>,<xref ref-type="bibr" rid="ref40">40</xref>]. Authors of the other study announced that their system is currently undergoing major design improvements and patent applications and that they hope to make it commercial very soon [<xref ref-type="bibr" rid="ref32">32</xref>]. Authors of one of the studies reported that they open-sourced their dataset and their source code implementation to facilitate adoption of their system and accelerate technology transfer among research groups [<xref ref-type="bibr" rid="ref32">32</xref>].</p></sec></sec><sec id="s3-14"><title>Emergency Responses Capability</title><p>Regarding emergency circumstances, 13 studies [<xref ref-type="bibr" rid="ref12">12</xref>-<xref ref-type="bibr" rid="ref15">15</xref>,<xref ref-type="bibr" rid="ref22">22</xref>,<xref ref-type="bibr" rid="ref25">25</xref>,<xref ref-type="bibr" rid="ref26">26</xref>,<xref ref-type="bibr" rid="ref28">28</xref>,<xref ref-type="bibr" rid="ref30">30</xref>,<xref ref-type="bibr" rid="ref32">32</xref>,<xref ref-type="bibr" rid="ref33">33</xref>,<xref ref-type="bibr" rid="ref35">35</xref>,<xref ref-type="bibr" rid="ref36">36</xref>] analyzed the capabilities of the system to respond in urgent situations according to various parameters.</p><sec id="s3-14-1"><title>Image-Based</title><p>Several image-based systems assessed key performance indicators relevant to urgent health care contexts, including recognition speed for signs and phrases [<xref ref-type="bibr" rid="ref12">12</xref>,<xref ref-type="bibr" rid="ref13">13</xref>], the system&#x2019;s ability to reliably alert health care providers [<xref ref-type="bibr" rid="ref12">12</xref>,<xref ref-type="bibr" rid="ref14">14</xref>,<xref ref-type="bibr" rid="ref34">34</xref>], and the clarity of communication output [<xref ref-type="bibr" rid="ref12">12</xref>,<xref ref-type="bibr" rid="ref26">26</xref>]. Some studies also addressed design considerations critical for emergency use, such as error tolerance (the system&#x2019;s resilience to technical failures) [<xref ref-type="bibr" rid="ref25">25</xref>], and the inclusion of medical vocabulary specific to emergency scenarios [<xref ref-type="bibr" rid="ref30">30</xref>]. One system also incorporated structured protocols aimed at guiding interactions with deaf patients during emergencies [<xref ref-type="bibr" rid="ref15">15</xref>]. However, a common limitation was the small size of the vocabulary recognized, which restricted the ability to handle diverse emergency dialogues [<xref ref-type="bibr" rid="ref22">22</xref>,<xref ref-type="bibr" rid="ref36">36</xref>].</p></sec><sec id="s3-14-2"><title>Sensor-Based</title><p>Sensor-based systems also demonstrated potential for real-time emergency support. For instance, one system was specifically designed to improve access to emergency services through depth-sensing technology [<xref ref-type="bibr" rid="ref41">41</xref>], while another emphasized robustness through error-tolerant functionality [<xref ref-type="bibr" rid="ref12">12</xref>]. Similar to image-based tools, sensor-based systems faced limitations related to the restricted number of medical terms recognized, which could hinder effective communication during critical situations [<xref ref-type="bibr" rid="ref32">32</xref>,<xref ref-type="bibr" rid="ref34">34</xref>].</p></sec></sec><sec id="s3-15"><title>Technological Characteristics</title><sec id="s3-15-1"><title>Image-Based</title><p>None of the studies reported on critical aspects such as system reliability (graceful degradation or recovery after crashes), data security, video storage, or deletion policies after translation, or objective metrics comparing communication outcomes with and without the system. Translation time was superficially addressed in 1 paper [<xref ref-type="bibr" rid="ref22">22</xref>]. Authors compared 3 different models (a 3D CNN; a pretrained visual geometry group 16 layers and a recurrent neural network with a long short-term memory scheme; and a model based on YOLO v5, an advanced object detection algorithm). When comparing the proposed techniques, the YOLO-based model was faster, with an image processing rate of 40&#x2010;90 frames per second. Therefore, it could be used for emergency sign recognition with only a few milliseconds of delay.</p></sec><sec id="s3-15-2"><title>Sensor-Based</title><p>Only 3 studies addressed translation time in sensor-based systems [<xref ref-type="bibr" rid="ref29">29</xref>,<xref ref-type="bibr" rid="ref34">34</xref>,<xref ref-type="bibr" rid="ref41">41</xref>]. One study compared different machine learning classifiers and reported the following processing times: 8.09 seconds for the na&#x00EF;ve Bayes classifier, 15.3 seconds for random forest, 17.63 seconds for dynamic time warping, and 20.8 seconds for HMM [<xref ref-type="bibr" rid="ref29">29</xref>]. Another system reported an average latency of 0.55 seconds for translating 550 signs, with mobile translation taking 122 milliseconds and phoneme streaming 206 milliseconds to render the animation [<xref ref-type="bibr" rid="ref34">34</xref>]. The third study reported that the system required 3 to 4 seconds from the end of the gesture to generate the output. In test scenarios, it consistently outperformed Google Cloud, with response times of 2.5, 2.6, and 2.2 seconds, compared to 4.1, 4.2, and 3.7 seconds for Google Cloud [<xref ref-type="bibr" rid="ref41">41</xref>].</p><p>As with image-based systems, none of the sensor-based systems discussed reliability, data privacy, postprocessing video storage, or communication effectiveness metrics.</p></sec></sec><sec id="s3-16"><title>User Experience</title><sec id="s3-16-1"><title>Image-Based</title><p>User experience was not explicitly evaluated in any of the image-based studies. A few of them commented on some factors related to the usability of the developed systems, both about their facilities and difficulties. These feedbacks are listed in <xref ref-type="supplementary-material" rid="app5">Multimedia Appendix 5</xref>.</p></sec><sec id="s3-16-2"><title>Sensor-Based</title><p>The evaluation of users&#x2019; experience was subjectively reported in 4 sensor-based studies [<xref ref-type="bibr" rid="ref34">34</xref>,<xref ref-type="bibr" rid="ref35">35</xref>,<xref ref-type="bibr" rid="ref40">40</xref>,<xref ref-type="bibr" rid="ref41">41</xref>]. The first one, classified as glove-based, conducted a user study on the signers&#x2019; experience. The study involved 12 participants, and Quality of Experience was rated on a 5-point scale, ranging from 0=worst evaluation)to 5=best evaluation, to assess the usefulness of the system. The obtained results were 4.2 for accessibility, 4.3 for usability, and 4.6 for overall experience [<xref ref-type="bibr" rid="ref34">34</xref>].</p><p>Additionally, a second study, classified as depth-sensing, developed a questionnaire score using the same scale for 5 statements. In total, 5 participants used the system, and afterward, responded to the questionnaire. The questionnaire results obtained for each statement question were 4.85 for &#x201C;I was able to express my situation using HospiSign,&#x201D; 4.85 for &#x201C;HospiSign is easy to use,&#x201D; 5.00 for &#x201C;Most people could learn to use HospiSign quickly,&#x201D; 4.45 for &#x201C;Options were sufficiently clear,&#x201D; and 5.00 for &#x201C;HospiSign would help the hearing impaired&#x201D; [<xref ref-type="bibr" rid="ref35">35</xref>].</p><p>Another study, from the glove-based category, provided a manual evaluation performed by 3 native Arabic speakers. Each of the 600 sentences was classified by them as understandable, somehow understandable, and not understandable according to grammatical and semantic metrics. The respective results were 80%, 12% and 8%, demonstrating an acceptable translation provision in 92% of the sample [<xref ref-type="bibr" rid="ref40">40</xref>].</p><p>Additionally, a third study, classified as depth-sensing, conducted a user satisfaction survey with 10 users, and the results were classified as high, medium, and low satisfaction. The results reported were user registration: high, text-to-call: high, call-to-sign: medium, sign-to-call: medium, and prerecorded message: high [<xref ref-type="bibr" rid="ref41">41</xref>].</p><p>Finally, a fourth study, classified as depth-sensing as well, developed a questionnaire score using the same scale for 5 statements. In total, 5 participants used the system, and afterward, responded to the questionnaire, and the results obtained for each statement were 4.85 for &#x201C;I was able to express my situation using HospiSign,&#x201D; 4.85 for &#x201C;HospiSign is easy to use,&#x201D; 5.00 for &#x201C;Most people could learn to use HospiSign quickly,&#x201D; 4.45 for &#x201C;Options were sufficiently clear,&#x201D; and 5.00 for &#x201C;HospiSign would help the hearing impaired&#x201D; [<xref ref-type="bibr" rid="ref35">35</xref>].</p><p>There were also some comments on the usability of the developed systems, concerning the user experience. These feedbacks are listed in <xref ref-type="supplementary-material" rid="app5">Multimedia Appendix 5</xref>.</p></sec></sec><sec id="s3-17"><title>Ethical Issues Addressed</title><p>Ethical concerns related to the use of communication systems for deaf and hard-of-hearing individuals were largely underexplored across the included studies. Importantly, none of the included studies discussed broader ethical implications of the systems themselves, such as data privacy and security, the handling and potential storage of video or biometric information, or the autonomy and psychological impact of using these technologies. Despite their role in sensitive health care contexts, no system reported design strategies for ensuring user dignity, minimizing potential emotional distress, or addressing users&#x2019; preferences regarding data management. Additionally, none of the studies evaluated whether addressing ethical aspects influenced system usability or user trust, including transparency, control over personal data, and informed decision-making.</p></sec></sec><sec id="s4" sec-type="discussion"><title>Discussion</title><sec id="s4-1"><title>Principal Findings</title><p>The development of SLR systems represents a promising step toward improving communication between deaf individuals and health care providers. Despite the vast literature in SLR systems, many computer science studies focus on creating algorithms for recognizing (and less often translating) signed content [<xref ref-type="bibr" rid="ref1">1</xref>]. These teams often lack deaf individuals with firsthand experience of the challenges the technology aims to address, and they may not fully understand the linguistic complexities of the language. Additionally, the algorithms are typically trained on datasets that do not reflect real-world scenarios, making these 1D approaches to sign language processing of limited practical value [<xref ref-type="bibr" rid="ref1">1</xref>]. In our analysis, the comprehensive search resulted in over 21,000 papers, but only 23 studies met our inclusion criteria. All analyzed systems are in the development and testing stage, with no real application yet, and this review highlights significant challenges that hinder the implementation of these systems in real-world settings. These challenges include variability in methodologies, limitations in dataset quality, and the underrepresentation of key communication elements, such as bidirectionality and facial expressions. Furthermore, the absence of cost analysis and the lack of multilingual integration reflect gaps in the current body of research. To address these issues, it is essential to evaluate both the technical aspects and the practical implications of SLR systems, aiming for inclusive and effective solutions.</p><p>Image-based systems accounted for the majority of the studies. This predominance highlights the versatility of image-based systems, which capture facial expressions, head movements, lip reading, and hand gestures using simple devices like mobile phone cameras [<xref ref-type="bibr" rid="ref47">47</xref>]. In contrast, it is possible to infer that sensor-based systems, which rely on advanced technologies such as depth-sensing cameras and sensor-embedded gloves, incur higher costs [<xref ref-type="bibr" rid="ref48">48</xref>]. However, a critical gap identified across studies was the lack of clear cost-related information. Only 1 study briefly mentioned that its device, &#x201C;SmartCall,&#x201D; was more affordable compared to similar solutions, suggesting its feasibility for low- and middle-income countries [<xref ref-type="bibr" rid="ref32">32</xref>].</p><p>Given the lack of sufficient cost-related data in the studies reviewed, it would be challenging to provide accurate inferences regarding the cost-effectiveness of these technologies. Furthermore, as costs vary significantly depending on the country and context, it would be difficult to generalize these findings across all regions. This is a notable limitation, and we believe that future studies should address this gap by providing more detailed and context-specific cost analyses.</p></sec><sec id="s4-2"><title>Corpus Definition and Number of Samples</title><p>Corpus definition and dataset size were highly heterogeneous across the reviewed studies, reflecting diverse development contexts and objectives. Most systems relied on isolated words, limiting applicability in real-world health care communication, while some incorporated phrases or real clinical contexts, occasionally guided by health professionals or dictionaries [<xref ref-type="bibr" rid="ref14">14</xref>,<xref ref-type="bibr" rid="ref26">26</xref>,<xref ref-type="bibr" rid="ref28">28</xref>,<xref ref-type="bibr" rid="ref40">40</xref>]. A few image-based systems also integrated facial expressions and body movements, enhancing linguistic representation [<xref ref-type="bibr" rid="ref15">15</xref>,<xref ref-type="bibr" rid="ref26">26</xref>-<xref ref-type="bibr" rid="ref28">28</xref>,<xref ref-type="bibr" rid="ref30">30</xref>,<xref ref-type="bibr" rid="ref31">31</xref>,<xref ref-type="bibr" rid="ref39">39</xref>]. Among sensor-based systems, corpus construction lacked multimodal features, particularly in glove-based systems, which are limited in capturing facial and body expressions.</p><p>Sample sizes ranged widely from as few as 10 to over 145,000 videos, raising concerns about the adequacy of data for training reliable models. This lack of standardization in corpus design and dataset size limits the comparability, scalability, and generalizability of SLR systems in clinical settings. Establishing minimum standards for corpus development could improve system robustness and support broader implementation in health care.</p></sec><sec id="s4-3"><title>Key Aspects of Effective Communication</title><p>Bidirectionality and facial expression recognition are 2 critical components for effective communication, but were not included in the majority of publications. Most systems were unidirectional, translating only from sign language to written text, impairing its applicability in real-world contexts. While such systems may enable health care practitioners to understand what a deaf patient is communicating, they fail to facilitate communication in the opposite direction, thereby undermining true interactive dialogue.</p><p>Facial expression recognition is a vital element for capturing linguistic prosody, and although face landmarks were considered in 8 papers [<xref ref-type="bibr" rid="ref15">15</xref>,<xref ref-type="bibr" rid="ref26">26</xref>-<xref ref-type="bibr" rid="ref28">28</xref>,<xref ref-type="bibr" rid="ref30">30</xref>,<xref ref-type="bibr" rid="ref31">31</xref>,<xref ref-type="bibr" rid="ref39">39</xref>,<xref ref-type="bibr" rid="ref40">40</xref>], only 1 of them explicitly addressed it as an indicator of grammatical tense [<xref ref-type="bibr" rid="ref28">28</xref>]. This omission hinders the natural and nuanced communication required in health care contexts.</p><p>Notably, only 3 studies explicitly acknowledged the cultural and structural distinctions between sign languages and oral languages [<xref ref-type="bibr" rid="ref15">15</xref>,<xref ref-type="bibr" rid="ref28">28</xref>,<xref ref-type="bibr" rid="ref40">40</xref>]. This omission is significant, as sign languages are not mere gestural representations of spoken language, but complete linguistic systems with their own grammar, syntax, and cultural context. Failure to consider these differences can lead to inaccurate or overly literal translations, potentially compromising communication quality and user trust in assistive systems. The limited attention to these linguistic specificities suggests that many of the reviewed systems may have been developed from a technical standpoint, with insufficient involvement of deaf communities or sign language experts.</p><p>Given the numerous challenges involved, it is nearly impossible to develop SLR tools that achieve 100% accuracy with a large vocabulary [<xref ref-type="bibr" rid="ref49">49</xref>]. For example, the same sign can have large changes in shape when it is in different locations in the sentence [<xref ref-type="bibr" rid="ref50">50</xref>]. In the studies analyzed, only 1 demonstrated perfect accuracy (100%), but this was based on an image-based system, which was tested only with isolated words [<xref ref-type="bibr" rid="ref12">12</xref>]. Studies that involved a relevant number of sentences and words tended to perform worse, with accuracy sometimes too low to enable effective communication [<xref ref-type="bibr" rid="ref14">14</xref>,<xref ref-type="bibr" rid="ref26">26</xref>].</p></sec><sec id="s4-4"><title>Multilingual Challenges</title><p>Sign languages are not universal, and they are not mutually intelligible, and they can present significant differences between countries, and sometimes even within the same country [<xref ref-type="bibr" rid="ref51">51</xref>]. While some studies proposed extending their systems to multiple languages [<xref ref-type="bibr" rid="ref12">12</xref>,<xref ref-type="bibr" rid="ref28">28</xref>,<xref ref-type="bibr" rid="ref41">41</xref>], none successfully implemented multilingual translation. The inherent diversity of sign languages&#x2014;each with unique grammar, vocabulary, and body configurations&#x2014;poses substantial challenges to creating universal systems [<xref ref-type="bibr" rid="ref27">27</xref>,<xref ref-type="bibr" rid="ref52">52</xref>,<xref ref-type="bibr" rid="ref53">53</xref>]. Furthermore, there was a predominance of English corpora [<xref ref-type="bibr" rid="ref25">25</xref>,<xref ref-type="bibr" rid="ref30">30</xref>-<xref ref-type="bibr" rid="ref32">32</xref>,<xref ref-type="bibr" rid="ref34">34</xref>,<xref ref-type="bibr" rid="ref36">36</xref>,<xref ref-type="bibr" rid="ref37">37</xref>]. This is probably driven by high research investments, as well as the global reach and accessibility of English [<xref ref-type="bibr" rid="ref54">54</xref>], which not only facilitates better comprehension within the medical academic community but also enables broader dissemination and open sharing of the corpus [<xref ref-type="bibr" rid="ref55">55</xref>]. This indicates a gap in accessibility for non-English&#x2013;speaking regions, emphasizing the importance of expanding datasets to include diverse languages and cultures. To address this gap, it is crucial to foster collaboration between researchers from different countries, sharing sign language corpora and visual data, so that a truly multilingual system can be developed.</p></sec><sec id="s4-5"><title>Multidisciplinary Partnerships</title><p>As Bragg et al [<xref ref-type="bibr" rid="ref1">1</xref>] described, an interdisciplinary approach is crucial to sign language processing. Representatives of the deaf community and health care practitioners must be involved in the team to better understand the needs of the community and the specific purposes for which the technology is intended. Linguistics plays a key role in identifying the structures of sign languages that the algorithms need to process. Natural language processing and machine translation offer valuable techniques for modeling, analyzing, and translating. Computer vision is necessary for recognizing signed content, while computer graphics are essential for generating it. Finally, human-computer interaction and design are vital for developing comprehensive systems that meet the needs of the community and integrate seamlessly into people&#x2019;s daily lives [<xref ref-type="bibr" rid="ref1">1</xref>].</p><p>The inclusion of multidisciplinary teams was a notable feature in several studies, combining expertise from linguistics, inclusive design, AI, and health care practitioners. However, the participation of deaf community representatives was limited, potentially undermining cultural and linguistic relevance.</p></sec><sec id="s4-6"><title>Ethical and Data Security Concerns</title><p>As these systems are intended to support real-time communication in clinical settings, their design must go beyond technical optimization to consider ethical issues such as privacy, autonomy, and emotional safety, dimensions largely overlooked in the reviewed studies. The reviewed literature does not adequately address the psychological impact of using these systems. Issues such as the psychological effects on users, especially those who are deaf or hard of hearing, were not discussed in any of the studies. These concerns are particularly important, as assistive technology should promote autonomy and well-being without causing additional stress or emotional burdens for users [<xref ref-type="bibr" rid="ref56">56</xref>].</p><p>Data security is another critical concern that received little attention. Many SLR systems collect sensitive information, such as facial images, gestures, and body movements, which may contain identifiable or private data. However, few studies addressed how these data are stored, protected, or deleted. In health care contexts&#x2014;where confidentiality is paramount&#x2014;this lack of attention to privacy and data protection represents a significant oversight.</p><p>These omissions highlight a critical gap: as assistive communication technologies are implemented in real-world health care settings, particularly among structurally marginalized populations like the deaf community, ethical design and responsible implementation are essential. Future studies should incorporate ethical considerations from the early stages of system development and report these aspects transparently.</p></sec><sec id="s4-7"><title>User Experience</title><p>Overall, few studies provided reports on the users&#x2019; experience with the developed systems, which were structurally evaluated and described in only 2 of them [<xref ref-type="bibr" rid="ref22">22</xref>,<xref ref-type="bibr" rid="ref36">36</xref>], and only with a small number of participants (12 in Areeb et al [<xref ref-type="bibr" rid="ref22">22</xref>] and 5 in Adithya and Rajesh [<xref ref-type="bibr" rid="ref36">36</xref>]). The other studies focused on usability metrics of the developed systems [<xref ref-type="bibr" rid="ref12">12</xref>,<xref ref-type="bibr" rid="ref14">14</xref>,<xref ref-type="bibr" rid="ref15">15</xref>,<xref ref-type="bibr" rid="ref25">25</xref>,<xref ref-type="bibr" rid="ref29">29</xref>]. The issues raised corroborate difficulties described in the literature. All recognition systems face the challenge of hand tracking and struggle with self-occlusion between fingers, with additional difficulties in single-hand signals [<xref ref-type="bibr" rid="ref57">57</xref>]. Such an aspect was mentioned especially by the authors of image-based systems, since in their approaches they did not attempt 3D perception to mitigate occlusion [<xref ref-type="bibr" rid="ref14">14</xref>,<xref ref-type="bibr" rid="ref15">15</xref>]. Furthermore, another significant obstacle is the interpersonal variation between signers, which includes not only their physical characteristics but also the way they perform gestures. Minor differences in signaling are important constraints for AI and recognition tools, justifying the reports regardless of the system&#x2019;s type [<xref ref-type="bibr" rid="ref12">12</xref>,<xref ref-type="bibr" rid="ref26">26</xref>].</p><p>Although the studies that presented user evaluation [<xref ref-type="bibr" rid="ref34">34</xref>,<xref ref-type="bibr" rid="ref35">35</xref>] reported that the systems performed well, showing positive impacts on accessibility and effective communication, the evidence is limited, as only 2 studies were conducted, and both involved a small number of users. User experience goes beyond the system&#x2019;s accuracy and usability, and according to International Organization for Standardization [<xref ref-type="bibr" rid="ref58">58</xref>], it refers to a &#x201C;person&#x2019;s perceptions and responses resulting from the use and/or anticipated use of a product, system or service,&#x201D; including &#x201C;all the users&#x2019; emotions, beliefs, preferences, perceptions, physical and psychological responses, behaviours and accomplishments that occur before, during and after use.&#x201D; Thus, generating a positive user experience for the deaf users is essential for the adoption and use of the proposed systems. While studies typically focus on accuracy and usability metrics, it is equally important to evaluate other factors, such as learning cost and response speed, which can significantly affect user adoption, especially in health care contexts, where ease of use and time efficiency are critical.</p><p>Saeed et al [<xref ref-type="bibr" rid="ref59">59</xref>], in a review on system-based sensory gloves for SLR, reported that user comfort was a challenge, as using a sensory glove required the user to wear a bulky glove containing sensors, cables, and a circuit board, which limited the user&#x2019;s hand mobility. However, we believe this issue is gradually being reduced, as sensors continue to evolve, becoming smaller and more efficient. Furthermore, the ability to connect these sensors via Bluetooth has further minimized the bulkiness, offering more comfort and flexibility for the user.</p><p>The design of interfaces for automatic sign language translation systems, specifically their integration within web-based and social media applications, necessitates careful consideration of numerous factors. Research conducted by Debevc et al [<xref ref-type="bibr" rid="ref46">46</xref>] and Ko&#x017E;uh et al [<xref ref-type="bibr" rid="ref60">60</xref>] addresses methodologies for enhancing the interaction experience for deaf and hard-of-hearing users within digital communication platforms [<xref ref-type="bibr" rid="ref46">46</xref>,<xref ref-type="bibr" rid="ref60">60</xref>]. The authors examine methods to improve accessibility [<xref ref-type="bibr" rid="ref46">46</xref>,<xref ref-type="bibr" rid="ref60">60</xref>].</p><p>A recurring concern highlighted in these studies is the potential for picture-in-picture windows to disrupt the user experience for nondeaf individuals. Therefore, the implementation of picture-in-picture displays should be judiciously considered and minimized. The domain of automatic sign language translation is characterized by rapid advancements and multifaceted complexities. The techniques used to optimize the user experience for these systems, while of significant relevance, warrant a dedicated review to comprehensively assess their efficacy and implications.</p></sec><sec id="s4-8"><title>Limitations and Strengths of the Review</title><p>This review is primarily qualitative, as the diversity in methodologies, technologies, and parameter calculations precluded quantitative synthesis. Additionally, there are no standardized tools to assess the risk of bias in these types of studies included. Furthermore, this review focused on systems that recognize sign language. Although these systems do not fully represent the portion of the deaf community that prefers lip reading, writing, or other methods of communication, they do include individuals who use sign language. Finally, 4 publications could not be included, as they were not retrieved despite at least 3 contact attempts regarding 2 studies and due to the unavailability of authors&#x2019; contact details of the other 2 studies.</p><p>The strengths of this systematic review include prospective registration and publication of a protocol [<xref ref-type="bibr" rid="ref18">18</xref>,<xref ref-type="bibr" rid="ref19">19</xref>]. Furthermore, it adheres to the Cochrane Guidelines and the PRISMA statement [<xref ref-type="bibr" rid="ref20">20</xref>,<xref ref-type="bibr" rid="ref21">21</xref>], and it has a broad search strategy encompassing multiple languages and publication types. A multidisciplinary team, including a deaf linguist, ensured diverse perspectives and representation.</p><p>Additionally, this study is innovative, as it focuses specifically on SLR for the health context, an area in which social inclusion and understanding differences are extremely important. Although other reviews were previously published, the majority of them cannot be considered true systematic reviews [<xref ref-type="bibr" rid="ref49">49</xref>,<xref ref-type="bibr" rid="ref59">59</xref>,<xref ref-type="bibr" rid="ref61">61</xref>,<xref ref-type="bibr" rid="ref62">62</xref>]. They lack a prepublication of a protocol, a robust methodology, a broad search strategy, paired screening and data extraction, and comprehensiveness. They usually focus on technical aspects of a broader and more general analysis of sign language translation systems and use restricted publication periods and language (English only) in the search. Overall, they did not include a search in MEDLINE, which is a crucial database for health-related research, and presented methodological shortcomings, such as not mentioning the use of paired reviewers for study selection and data extraction. Furthermore, they have not reported extracting data on user experience, cost, and data security. Unlike these more general reviews, our study is unique in focusing on the health care context. Therefore, our study fills these gaps, providing a more robust, detailed, and comprehensive contribution specifically to the health field.</p></sec><sec id="s4-9"><title>Final Considerations</title><p>Image-based systems dominated the field, demonstrating greater accessibility but facing challenges with lighting conditions and environmental variability. While sensor-based systems showed higher precision, their reliance on specialized equipment limits scalability. Critical aspects, such as bidirectional communication and facial expression recognition, remain underexplored, hindering the practical implementation of these systems in health care settings.</p><p>Based on the evidence reviewed, effective SRL systems for real-world health care scenarios are scarce. Future research should focus on developing adaptive systems capable of recognizing diverse sign languages and addressing the varying communication needs of the deaf community. It should prioritize inclusive and participatory design processes that respect the linguistic integrity of sign languages and address sociocultural nuances critical to effective communication. Expanding datasets, incorporating user feedback, and fostering multidisciplinary collaborations will be pivotal for creating inclusive and scalable solutions.</p><p>Additionally, future research should incorporate a comprehensive assessment of the proposed systems, addressing both technical and user-centered aspects. This assessment should consider device compatibility (especially for smartphone-based systems), required learning time, translation accuracy across different signers and accents [<xref ref-type="bibr" rid="ref63">63</xref>], user trust in automated translation, its effect on self-expression, and privacy implications. These evaluations should not only be conducted once the system is fully implemented but also throughout the development process (ie, formative evaluation) [<xref ref-type="bibr" rid="ref64">64</xref>].</p><p>Another significant gap identified in the literature is the lack of a standardized evaluation framework for SLR systems. The inclusion of consistent and well-defined indicators, such as user satisfaction and economic viability, would provide a more comprehensive understanding of the systems&#x2019; effectiveness and their potential for widespread adoption. User satisfaction is a crucial success factor, and including economic indicators would help assess whether the systems can be feasibly adopted in different settings, especially in low- and middle-income countries. The implementation of the framework would not only enhance the comparability of findings but also provide valuable insights for policymakers and stakeholders in the health care industry.</p><p>Furthermore, we suggest that the development of future SLR solutions involves experts in ethics, law, and data security to ensure compliance with legal and ethical standards and to protect users during the implementation of these technologies. Studies should provide more detailed information about how ethical and data security issues are managed and the potential impacts on user trust and adoption.</p><p>One notable strength identified in this review was the initiative of a research group to open-source both their dataset and system code [<xref ref-type="bibr" rid="ref32">32</xref>]. This practice promotes transparency, reproducibility, and collaboration&#x2014;key elements for the advancement of SLR systems. By allowing other researchers to access and build upon existing work, open data and open-source tools can accelerate the refinement of models and the development of more accurate and user-friendly solutions. In the long term, such collaborative practices may contribute to the creation of robust, scalable systems that are truly usable in health care settings, bridging communication gaps and improving care for deaf individuals. Encouraging the adoption of open science principles within the SLR research community could significantly enhance innovation and standardization efforts in this emerging field.</p></sec><sec id="s4-10"><title>Conclusions</title><p>This review highlights the ongoing development of communication systems designed to assist deaf individuals who use sign language to improve their interaction with health care providers. There was a predominance of image-based approaches over sensor-based ones, though both have demonstrated substantial variability in accuracy in recognizing and interpreting signs. However, critical aspects such as bidirectional communication and the recognition of facial expressions, which are essential for effective communication, were notably absent in most studies. None of the systems has reported to have addressed all aspects critical to integration into health care settings. These findings underscore the need for further research, especially regarding the practical implementation of these systems, their usability, and their overall effect on the quality of care for deaf patients.</p></sec></sec></body><back><ack><p>This manuscript was prepared by the authors without the assistance of artificial intelligence tools.</p></ack><notes><sec><title>Funding</title><p>This study was funded in part by the Institute for Health Assessment and Translation for Highly Relevant Chronic and Neglected Diseases (IATS CARE)/National Institute of Science and Technology for Health Technology Assessment (Instituto de Avalia&#x00E7;&#x00E3;o de Tecnologias em Sa&#x00FA;de) or National Council for Scientific and Technological Development (Conselho Nacional de Desenvolvimento Cient&#x00ED;fico e Tecnol&#x00F3;gico; grant 465518/2014-1), Financiadora de Estudos e Projetos (grant 01.21.0097.00), and SignumWeb Comunica&#x00E7;&#x00E3;o Inclusiva Ltda, as part of the project &#x201C;Captar-Libras: Sistema de Comunica&#x00E7;&#x00E3;o por v&#x00ED;deos para surdos aplicado ao pr&#x00E9;-atendimento m&#x00E9;dico.&#x201D; MSM, MFMC, and LRV were supported in part by Conselho Nacional de Desenvolvimento Cient&#x00ED;fico e Tecnol&#x00F3;gico (grants 311742/2025-4, 316841/2021-8, and 134878/2023-0, respectively). The funding sources had no role in the design of this study and will not have any role during its execution, analyses, interpretation of the data, or decision.</p></sec><sec><title>Data Availability</title><p>All data generated or analyzed during this study are included in this published paper and its <xref ref-type="supplementary-material" rid="app1">Multimedia Appendices 1</xref>,<xref ref-type="supplementary-material" rid="app6">2</xref>,<xref ref-type="supplementary-material" rid="app3">3</xref>,<xref ref-type="supplementary-material" rid="app2">4</xref>,<xref ref-type="supplementary-material" rid="app4">5</xref>,<xref ref-type="supplementary-material" rid="app5">6</xref>.</p></sec></notes><fn-group><fn fn-type="con"><p>MSM, LFRdO, LRV, ROP, and ZSNR developed a draft of the literature search, which LFRdO and LRV executed. MSM, ROP, ZSNR, and MFMC guided the construction of the protocol, and MSM is a guarantor of the review. LFRdO, LRV, LMMSR, GTdSS, NSS, and MRdC contributed to the work&#x2019;s extraction, analysis, or interpretation of data. MSM, LFRdO, LRV, LMMSR, MRdC, and GTdSS drafted the manuscript; all the other authors performed critical revisions. ELAB and RAAC are professors of linguistics and experts in sign language. They reviewed the entire manuscript to ensure the language was consistent with the literature on disability and deafness. All authors read and approved the final manuscript.</p></fn><fn fn-type="conflict"><p>None declared.</p></fn></fn-group><glossary><title>Abbreviations</title><def-list><def-item><term id="abb1">AI</term><def><p>artificial intelligence</p></def></def-item><def-item><term id="abb2">CNN</term><def><p>convolutional neural network</p></def></def-item><def-item><term id="abb3">HMM</term><def><p>hidden Markov model</p></def></def-item><def-item><term id="abb4">PRISMA</term><def><p>Preferred Reporting Items for Systematic Reviews and Meta-Analyses</p></def></def-item><def-item><term id="abb5">SLR</term><def><p>sign language recognition</p></def></def-item><def-item><term id="abb6">YOLO</term><def><p>You Only Look Once</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Bragg</surname><given-names>D</given-names> </name><name name-style="western"><surname>Koller</surname><given-names>O</given-names> </name><name name-style="western"><surname>Bellard</surname><given-names>M</given-names> </name><etal/></person-group><article-title>Sign language recognition, generation, and translation: an interdisciplinary perspective</article-title><conf-name>Proceedings of the 21st International ACM SIGACCESS Conference on Computers and Accessibility</conf-name><conf-date>Oct 28-30, 2019</conf-date><pub-id pub-id-type="doi">10.1145/3308561.3353774</pub-id></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Kendon</surname><given-names>A</given-names> </name></person-group><source>Gesture Visible Action as Utterance</source><year>2004</year><publisher-name>Cambridge University Press</publisher-name><pub-id pub-id-type="other">0521542936</pub-id></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="web"><article-title>Deafness and hearing loss</article-title><source>World Health Organization</source><year>2023</year><access-date>2024-11-14</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.who.int/news-room/fact-sheets/detail/deafness-and-hearing-loss">https://www.who.int/news-room/fact-sheets/detail/deafness-and-hearing-loss</ext-link></comment></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="web"><source>World Federation of the Deaf</source><access-date>2025-12-05</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://wfdeaf.org">https://wfdeaf.org</ext-link></comment></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ubido</surname><given-names>J</given-names> </name><name name-style="western"><surname>Huntington</surname><given-names>J</given-names> </name><name name-style="western"><surname>Warburton</surname><given-names>D</given-names> </name></person-group><article-title>Inequalities in access to healthcare faced by women who are deaf</article-title><source>Health Soc Care Community</source><year>2002</year><month>07</month><volume>10</volume><issue>4</issue><fpage>247</fpage><lpage>253</lpage><pub-id pub-id-type="doi">10.1046/j.1365-2524.2002.00365.x</pub-id><pub-id pub-id-type="medline">12193168</pub-id></nlm-citation></ref><ref id="ref6"><label>6</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kuenburg</surname><given-names>A</given-names> </name><name name-style="western"><surname>Fellinger</surname><given-names>P</given-names> </name><name name-style="western"><surname>Fellinger</surname><given-names>J</given-names> </name></person-group><article-title>Health care access among deaf people</article-title><source>J Deaf Stud Deaf Educ</source><year>2016</year><month>01</month><volume>21</volume><issue>1</issue><fpage>1</fpage><lpage>10</lpage><pub-id pub-id-type="doi">10.1093/deafed/env042</pub-id><pub-id pub-id-type="medline">26405210</pub-id></nlm-citation></ref><ref id="ref7"><label>7</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Mansutti</surname><given-names>I</given-names> </name><name name-style="western"><surname>Achil</surname><given-names>I</given-names> </name><name name-style="western"><surname>Rosa Gastaldo</surname><given-names>C</given-names> </name><name name-style="western"><surname>Tom&#x00E9; Pires</surname><given-names>C</given-names> </name><name name-style="western"><surname>Palese</surname><given-names>A</given-names> </name></person-group><article-title>Individuals with hearing impairment/deafness during the COVID-19 pandemic: a rapid review on communication challenges and strategies</article-title><source>J Clin Nurs</source><year>2023</year><month>08</month><volume>32</volume><issue>15-16</issue><fpage>4454</fpage><lpage>4472</lpage><pub-id pub-id-type="doi">10.1111/jocn.16572</pub-id><pub-id pub-id-type="medline">36320127</pub-id></nlm-citation></ref><ref id="ref8"><label>8</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Rannefeld</surname><given-names>J</given-names> </name><name name-style="western"><surname>O&#x2019;Sullivan</surname><given-names>JL</given-names> </name><name name-style="western"><surname>Kuhlmey</surname><given-names>A</given-names> </name><name name-style="western"><surname>Zoellick</surname><given-names>JC</given-names> </name></person-group><article-title>Deaf and hard-of-hearing patients are unsatisfied with and avoid German health care: results from an online survey in German Sign Language</article-title><source>BMC Public Health</source><year>2023</year><month>10</month><day>18</day><volume>23</volume><issue>1</issue><fpage>2026</fpage><pub-id pub-id-type="doi">10.1186/s12889-023-16924-w</pub-id><pub-id pub-id-type="medline">37848898</pub-id></nlm-citation></ref><ref id="ref9"><label>9</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Rastgoo</surname><given-names>R</given-names> </name><name name-style="western"><surname>Kiani</surname><given-names>K</given-names> </name><name name-style="western"><surname>Escalera</surname><given-names>S</given-names> </name></person-group><article-title>Sign language recognition: a deep survey</article-title><source>Expert Syst Appl</source><year>2021</year><month>02</month><volume>164</volume><fpage>113794</fpage><pub-id pub-id-type="doi">10.1016/j.eswa.2020.113794</pub-id></nlm-citation></ref><ref id="ref10"><label>10</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Papastratis</surname><given-names>I</given-names> </name><name name-style="western"><surname>Chatzikonstantinou</surname><given-names>C</given-names> </name><name name-style="western"><surname>Konstantinidis</surname><given-names>D</given-names> </name><name name-style="western"><surname>Dimitropoulos</surname><given-names>K</given-names> </name><name name-style="western"><surname>Daras</surname><given-names>P</given-names> </name></person-group><article-title>Artificial intelligence technologies for sign language</article-title><source>Sensors (Basel)</source><year>2021</year><month>08</month><day>30</day><volume>21</volume><issue>17</issue><fpage>5843</fpage><pub-id pub-id-type="doi">10.3390/s21175843</pub-id><pub-id pub-id-type="medline">34502733</pub-id></nlm-citation></ref><ref id="ref11"><label>11</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Parton</surname><given-names>BS</given-names> </name></person-group><article-title>Sign language recognition and translation: a multidisciplined approach from the field of artificial intelligence</article-title><source>J Deaf Stud Deaf Educ</source><year>2006</year><volume>11</volume><issue>1</issue><fpage>94</fpage><lpage>101</lpage><pub-id pub-id-type="doi">10.1093/deafed/enj003</pub-id><pub-id pub-id-type="medline">16192405</pub-id></nlm-citation></ref><ref id="ref12"><label>12</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sosa-Jim&#x00E9;nez</surname><given-names>CO</given-names> </name><name name-style="western"><surname>R&#x00ED;os-Figueroa</surname><given-names>HV</given-names> </name><name name-style="western"><surname>Sol&#x00ED;s-Gonz&#x00E1;lez-Cos&#x00ED;o</surname><given-names>AL</given-names> </name></person-group><article-title>A prototype for Mexican Sign Language recognition and synthesis in support of a primary care physician</article-title><source>IEEE Access</source><year>2022</year><volume>10</volume><fpage>127620</fpage><lpage>127635</lpage><pub-id pub-id-type="doi">10.1109/ACCESS.2022.3226696</pub-id></nlm-citation></ref><ref id="ref13"><label>13</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Xia</surname><given-names>K</given-names> </name><name name-style="western"><surname>Lu</surname><given-names>W</given-names> </name><name name-style="western"><surname>Fan</surname><given-names>H</given-names> </name><name name-style="western"><surname>Zhao</surname><given-names>Q</given-names> </name></person-group><article-title>A sign language recognition system applied to deaf-mute medical consultation</article-title><source>Sensors (Basel)</source><year>2022</year><month>11</month><day>24</day><volume>22</volume><issue>23</issue><fpage>9107</fpage><pub-id pub-id-type="doi">10.3390/s22239107</pub-id><pub-id pub-id-type="medline">36501809</pub-id></nlm-citation></ref><ref id="ref14"><label>14</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Pikoulis</surname><given-names>EV</given-names> </name><name name-style="western"><surname>Bifis</surname><given-names>A</given-names> </name><name name-style="western"><surname>Trigka</surname><given-names>M</given-names> </name><name name-style="western"><surname>Constantinopoulos</surname><given-names>C</given-names> </name><name name-style="western"><surname>Kosmopoulos</surname><given-names>D</given-names> </name></person-group><article-title>Context-aware automatic sign language video transcription in psychiatric interviews</article-title><source>Sensors (Basel)</source><year>2022</year><month>03</month><day>30</day><volume>22</volume><issue>7</issue><fpage>2656</fpage><pub-id pub-id-type="doi">10.3390/s22072656</pub-id><pub-id pub-id-type="medline">35408270</pub-id></nlm-citation></ref><ref id="ref15"><label>15</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>da Silva</surname><given-names>DRB</given-names> </name><name name-style="western"><surname>de Ara&#x00FA;jo</surname><given-names>TMU</given-names> </name><name name-style="western"><surname>do R&#x00EA;go</surname><given-names>TG</given-names> </name><name name-style="western"><surname>Brand&#x00E3;o</surname><given-names>MAC</given-names> </name><name name-style="western"><surname>Gon&#x00E7;alves</surname><given-names>LMG</given-names> </name></person-group><article-title>A multiple stream architecture for the recognition of signs in Brazilian sign language in the context of health</article-title><source>Multimed Tools Appl</source><year>2024</year><volume>83</volume><issue>7</issue><fpage>19767</fpage><lpage>19785</lpage><pub-id pub-id-type="doi">10.1007/s11042-023-16332-7</pub-id></nlm-citation></ref><ref id="ref16"><label>16</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>S Kumar</surname><given-names>S</given-names> </name><name name-style="western"><surname>Wangyal</surname><given-names>T</given-names> </name><name name-style="western"><surname>Saboo</surname><given-names>V</given-names> </name><name name-style="western"><surname>Srinath</surname><given-names>R</given-names> </name></person-group><article-title>Time series neural networks for real time sign language translation</article-title><conf-name>2018 17th IEEE International Conference on Machine Learning and Applications (ICMLA)</conf-name><conf-date>Dec 17-20, 2018</conf-date><pub-id pub-id-type="doi">10.1109/ICMLA.2018.00043</pub-id></nlm-citation></ref><ref id="ref17"><label>17</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Alaghband</surname><given-names>M</given-names> </name><name name-style="western"><surname>Maghroor</surname><given-names>HR</given-names> </name><name name-style="western"><surname>Garibay</surname><given-names>I</given-names> </name></person-group><article-title>A survey on sign language literature</article-title><source>Mach Learn Appl</source><year>2023</year><month>12</month><volume>14</volume><fpage>100504</fpage><pub-id pub-id-type="doi">10.1016/j.mlwa.2023.100504</pub-id></nlm-citation></ref><ref id="ref18"><label>18</label><nlm-citation citation-type="web"><article-title>Sign language recognition system for deaf patients: a systematic review</article-title><source>Open Science Framework</source><year>2023</year><access-date>2026-01-28</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://osf.io/fpemr/overview">https://osf.io/fpemr/overview</ext-link></comment></nlm-citation></ref><ref id="ref19"><label>19</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Marcolino</surname><given-names>MS</given-names> </name><name name-style="western"><surname>Oliveira</surname><given-names>LFR</given-names> </name><name name-style="western"><surname>Valle</surname><given-names>LR</given-names> </name><etal/></person-group><article-title>Sign language recognition system for deaf patients: protocol for a systematic review</article-title><source>JMIR Res Protoc</source><year>2025</year><month>01</month><day>23</day><volume>14</volume><issue>1</issue><fpage>e55427</fpage><pub-id pub-id-type="doi">10.2196/55427</pub-id><pub-id pub-id-type="medline">39847417</pub-id></nlm-citation></ref><ref id="ref20"><label>20</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Higgins</surname><given-names>JPT</given-names> </name></person-group><source>Cochrane Handbook for Systematic Reviews of Interventions Version 65 (Updated August 2024)</source><year>2024</year><publisher-name>Cochrane</publisher-name></nlm-citation></ref><ref id="ref21"><label>21</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Page</surname><given-names>MJ</given-names> </name><name name-style="western"><surname>McKenzie</surname><given-names>JE</given-names> </name><name name-style="western"><surname>Bossuyt</surname><given-names>PM</given-names> </name><etal/></person-group><article-title>The PRISMA 2020 statement: an updated guideline for reporting systematic reviews</article-title><source>BMJ</source><year>2021</year><month>03</month><day>29</day><volume>372</volume><fpage>n71</fpage><pub-id pub-id-type="doi">10.1136/bmj.n71</pub-id><pub-id pub-id-type="medline">33782057</pub-id></nlm-citation></ref><ref id="ref22"><label>22</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Areeb</surname><given-names>QM</given-names> </name><name name-style="western"><surname>Nadeem</surname><given-names>M</given-names> </name><name name-style="western"><surname>Alroobaea</surname><given-names>R</given-names> </name><name name-style="western"><surname>Anwer</surname><given-names>F</given-names> </name></person-group><article-title>Helping hearing-impaired in emergency situations: a deep learning-based approach</article-title><source>IEEE Access</source><year>2022</year><volume>10</volume><fpage>8502</fpage><lpage>8517</lpage><pub-id pub-id-type="doi">10.1109/ACCESS.2022.3142918</pub-id></nlm-citation></ref><ref id="ref23"><label>23</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Areeb</surname><given-names>QM</given-names> </name><name name-style="western"><surname>Nadeem</surname><given-names>M</given-names> </name></person-group><article-title>Deep learning based hand gesture recognition for emergency situation: a study on Indian Sign Language</article-title><conf-name>2021 International Conference on Data Analytics for Business and Industry (ICDABI)</conf-name><conf-date>Oct 25-26, 2021</conf-date><pub-id pub-id-type="doi">10.1109/ICDABI53623.2021.9655842</pub-id></nlm-citation></ref><ref id="ref24"><label>24</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>da Silva</surname><given-names>DRB</given-names> </name><name name-style="western"><surname>Araujo</surname><given-names>TMU</given-names> </name><name name-style="western"><surname>R&#x00EA;go</surname><given-names>T do</given-names> </name><name name-style="western"><surname>Brand&#x00E3;o</surname><given-names>MAC</given-names> </name></person-group><article-title>A two-stream model based on 3D convolutional neural networks for the recognition of brazilian sign language in the health context</article-title><conf-name>WebMedia &#x2019;20: Proceedings of the Brazilian Symposium on Multimedia and the Web</conf-name><conf-date>Nov 30 to Dec 4, 2020</conf-date><pub-id pub-id-type="doi">10.1145/3428658.3430980</pub-id></nlm-citation></ref><ref id="ref25"><label>25</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Das</surname><given-names>S</given-names> </name><name name-style="western"><surname>Biswas</surname><given-names>SK</given-names> </name><name name-style="western"><surname>Purkayastha</surname><given-names>B</given-names> </name></person-group><article-title>Indian Sign Language recognition system for emergency words by using shape and deep features</article-title><conf-name>2023 11th International Conference on Internet of Everything, Microwave Engineering, Communication and Networks (IEMECON)</conf-name><conf-date>Feb 10-11, 2023</conf-date><pub-id pub-id-type="doi">10.1109/IEMECON56962.2023.10092312</pub-id></nlm-citation></ref><ref id="ref26"><label>26</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ko</surname><given-names>SK</given-names> </name><name name-style="western"><surname>Kim</surname><given-names>CJ</given-names> </name><name name-style="western"><surname>Jung</surname><given-names>H</given-names> </name><name name-style="western"><surname>Cho</surname><given-names>C</given-names> </name></person-group><article-title>Neural sign language translation based on human keypoint estimation</article-title><source>Appl Sci (Basel)</source><year>2019</year><volume>9</volume><issue>13</issue><fpage>2683</fpage><pub-id pub-id-type="doi">10.3390/app9132683</pub-id></nlm-citation></ref><ref id="ref27"><label>27</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Barrientos-Villalta</surname><given-names>GF</given-names> </name><name name-style="western"><surname>Quiroz</surname><given-names>P</given-names> </name><name name-style="western"><surname>Ugarte</surname><given-names>W</given-names> </name></person-group><article-title>Peruvian Sign Language recognition using recurrent neural networks</article-title><conf-name>International Conference on Advanced Research in Technologies, Information, Innovation and Sustainability</conf-name><conf-date>Sep 12-15, 2022</conf-date><pub-id pub-id-type="doi">10.1007/978-3-031-20319-0_34</pub-id></nlm-citation></ref><ref id="ref28"><label>28</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Ram&#x00ED;rez S&#x00E1;nchez</surname><given-names>JE</given-names> </name><name name-style="western"><surname>Rodr&#x00ED;guez</surname><given-names>AA</given-names> </name><name name-style="western"><surname>Mendoza</surname><given-names>MG</given-names> </name></person-group><article-title>Real-time Mexican Sign Language interpretation using CNN and HMM</article-title><conf-name>Mexican International Conference on Artificial Intelligence</conf-name><conf-date>Oct 25-30, 2021</conf-date><pub-id pub-id-type="doi">10.1007/978-3-030-89817-5_4</pub-id></nlm-citation></ref><ref id="ref29"><label>29</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Hisham</surname><given-names>B</given-names> </name><name name-style="western"><surname>Hamouda</surname><given-names>A</given-names> </name></person-group><article-title>Supervised learning classifiers for Arabic gestures recognition using Kinect V2</article-title><source>SN Appl Sci</source><year>2019</year><month>07</month><volume>1</volume><issue>7</issue><fpage>1</fpage><lpage>21</lpage><pub-id pub-id-type="doi">10.1007/s42452-019-0771-2</pub-id></nlm-citation></ref><ref id="ref30"><label>30</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Gandhi</surname><given-names>J</given-names> </name><name name-style="western"><surname>Gandhi</surname><given-names>P</given-names> </name><name name-style="western"><surname>Gosar</surname><given-names>A</given-names> </name><name name-style="western"><surname>Chaudhari</surname><given-names>S</given-names> </name></person-group><article-title>Video recognition techniques for Indian Sign Language in healthcare domain</article-title><conf-name>2021 2nd International Conference for Emerging Technology (INCET)</conf-name><conf-date>May 21-23, 2021</conf-date><pub-id pub-id-type="doi">10.1109/INCET51464.2021.9456116</pub-id></nlm-citation></ref><ref id="ref31"><label>31</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Uchil</surname><given-names>AP</given-names> </name><name name-style="western"><surname>Jha</surname><given-names>S</given-names> </name><name name-style="western"><surname>Sudha</surname><given-names>BG</given-names> </name></person-group><article-title>Vision-based deep learning approach for dynamic Indian Sign Language recognition in healthcare</article-title><conf-name>Computational Vision and Bio-Inspired Computing: ICCVBIC 2019</conf-name><conf-date>Sep 25-26, 2019</conf-date><pub-id pub-id-type="doi">10.1007/978-3-030-37218-7_43</pub-id></nlm-citation></ref><ref id="ref32"><label>32</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Deji Dere</surname><given-names>M</given-names> </name><name name-style="western"><surname>Dere</surname><given-names>RO</given-names> </name><name name-style="western"><surname>Adesina</surname><given-names>A</given-names> </name><name name-style="western"><surname>Yauri</surname><given-names>AR</given-names> </name></person-group><article-title>SmartCall: a real-time, sign language medical emergency communicator</article-title><conf-name>2022 5th Information Technology for Education and Development (ITED)</conf-name><conf-date>Nov 1-3, 2022</conf-date><pub-id pub-id-type="doi">10.1109/ITED56637.2022.10051420</pub-id></nlm-citation></ref><ref id="ref33"><label>33</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Sarhan</surname><given-names>NA</given-names> </name><name name-style="western"><surname>El-Sonbaty</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Youssef</surname><given-names>SM</given-names> </name></person-group><article-title>HMM-based Arabic Sign Language recognition using Kinect</article-title><conf-name>2015 Tenth International Conference on Digital Information Management (ICDIM)</conf-name><conf-date>Dec 23-24, 2015</conf-date><pub-id pub-id-type="doi">10.1109/ICDIM.2015.7381873</pub-id></nlm-citation></ref><ref id="ref34"><label>34</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Guo</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Zhao</surname><given-names>J</given-names> </name><name name-style="western"><surname>Ding</surname><given-names>B</given-names> </name><etal/></person-group><article-title>Sign-to-911: emergency call service for sign language users with assistive AR glasses</article-title><conf-name>ACM MobiCom &#x2019;23: Proceedings of the 29th Annual International Conference on Mobile Computing and Networking</conf-name><conf-date>Oct 2-5, 2023</conf-date><pub-id pub-id-type="doi">10.1145/3570361.3613260</pub-id></nlm-citation></ref><ref id="ref35"><label>35</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>S&#x00FC;zg&#x00FC;n</surname><given-names>M</given-names> </name><name name-style="western"><surname>&#x00D6;zdemir</surname><given-names>H</given-names> </name><name name-style="western"><surname>Camg&#x00F6;z</surname><given-names>N</given-names> </name><etal/></person-group><article-title>HospiSign: an interactive sign language platform for hearing impaired</article-title><source>J Naval Sci Eng</source><year>2015</year><access-date>2026-03-31</access-date><volume>11</volume><issue>3</issue><fpage>75</fpage><lpage>92</lpage><comment><ext-link ext-link-type="uri" xlink:href="https://www.cihancamgoz.com/pub/suzgun2015eurasia.pdf">https://www.cihancamgoz.com/pub/suzgun2015eurasia.pdf</ext-link></comment></nlm-citation></ref><ref id="ref36"><label>36</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Adithya</surname><given-names>V</given-names> </name><name name-style="western"><surname>Rajesh</surname><given-names>R</given-names> </name></person-group><article-title>Hand gestures for emergency situations: a video dataset based on words from Indian Sign Language</article-title><source>Data Brief</source><year>2020</year><month>08</month><volume>31</volume><fpage>106016</fpage><pub-id pub-id-type="doi">10.1016/j.dib.2020.106016</pub-id><pub-id pub-id-type="medline">32715044</pub-id></nlm-citation></ref><ref id="ref37"><label>37</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ihsan</surname><given-names>MA</given-names> </name><name name-style="western"><surname>Eram</surname><given-names>AF</given-names> </name><name name-style="western"><surname>Nahar</surname><given-names>L</given-names> </name><name name-style="western"><surname>Kadir</surname><given-names>MA</given-names> </name></person-group><article-title>MediSign: an attention-based CNN-BiLSTM approach of classifying word level signs for patient-doctor interaction in hearing impaired community</article-title><source>IEEE Access</source><year>2024</year><volume>12</volume><fpage>33803</fpage><lpage>33815</lpage><pub-id pub-id-type="doi">10.1109/ACCESS.2024.3370684</pub-id></nlm-citation></ref><ref id="ref38"><label>38</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Das</surname><given-names>HV</given-names> </name><name name-style="western"><surname>Mohan</surname><given-names>K</given-names> </name><name name-style="western"><surname>Paul</surname><given-names>L</given-names> </name><name name-style="western"><surname>Kumaresan</surname><given-names>S</given-names> </name><name name-style="western"><surname>Nair</surname><given-names>CS</given-names> </name></person-group><article-title>Transforming consulting atmosphere with Indian Sign Language translation</article-title><source>Multimed Tools Appl</source><year>2024</year><volume>83</volume><issue>5</issue><fpage>13543</fpage><lpage>13555</lpage><pub-id pub-id-type="doi">10.1007/s11042-023-15214-2</pub-id></nlm-citation></ref><ref id="ref39"><label>39</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Faisal</surname><given-names>M</given-names> </name><name name-style="western"><surname>Alsulaiman</surname><given-names>M</given-names> </name><name name-style="western"><surname>Mekhtiche</surname><given-names>M</given-names> </name><etal/></person-group><article-title>Enabling two-way communication of deaf using Saudi Sign Language</article-title><source>IEEE Access</source><year>2023</year><volume>11</volume><fpage>135423</fpage><lpage>135434</lpage><pub-id pub-id-type="doi">10.1109/ACCESS.2023.3337514</pub-id></nlm-citation></ref><ref id="ref40"><label>40</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Luqman</surname><given-names>H</given-names> </name><name name-style="western"><surname>Mahmoud</surname><given-names>SA</given-names> </name></person-group><article-title>A machine translation system from Arabic Sign Language to Arabic</article-title><source>Univ Access Inf Soc</source><year>2020</year><month>11</month><volume>19</volume><issue>4</issue><fpage>891</fpage><lpage>904</lpage><pub-id pub-id-type="doi">10.1007/s10209-019-00695-6</pub-id></nlm-citation></ref><ref id="ref41"><label>41</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Dewasurendra</surname><given-names>D</given-names> </name><name name-style="western"><surname>Kumar</surname><given-names>A</given-names> </name><name name-style="western"><surname>Perera</surname><given-names>I</given-names> </name><name name-style="western"><surname>Jayasena</surname><given-names>D</given-names> </name><name name-style="western"><surname>Thelijjagoda</surname><given-names>S</given-names> </name></person-group><article-title>Emergency communication application for speech and hearing-impaired citizens</article-title><conf-name>2020 From Innovation to Impact (FITI)</conf-name><conf-date>Dec 15-15, 2020</conf-date><pub-id pub-id-type="doi">10.1109/FITI52050.2020.9424899</pub-id></nlm-citation></ref><ref id="ref42"><label>42</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Bellil</surname><given-names>L</given-names> </name><name name-style="western"><surname>Ghiri</surname><given-names>MG</given-names> </name><name name-style="western"><surname>Boulesnane</surname><given-names>A</given-names> </name></person-group><article-title>Empowering deaf community in healthcare communication: 1D-CNN-based Algerian Sign Language recognition system</article-title><conf-name>2024 6th International Conference on Pattern Analysis and Intelligent Systems (PAIS)</conf-name><conf-date>Apr 24-25, 2024</conf-date><pub-id pub-id-type="doi">10.1109/PAIS62114.2024.10541233</pub-id></nlm-citation></ref><ref id="ref43"><label>43</label><nlm-citation citation-type="web"><article-title>Vosk: speech recognition toolkit</article-title><source>Alphacephei</source><access-date>2026-03-24</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://alphacephei.com/vosk/">https://alphacephei.com/vosk/</ext-link></comment></nlm-citation></ref><ref id="ref44"><label>44</label><nlm-citation citation-type="web"><article-title>Arabic language disambiguation for natural language processing applications</article-title><source>Columbia University</source><access-date>2026-03-24</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://inventions.techventures.columbia.edu/technologies/arabic-language-disambiguation--cu14012">https://inventions.techventures.columbia.edu/technologies/arabic-language-disambiguation--cu14012</ext-link></comment></nlm-citation></ref><ref id="ref45"><label>45</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Shotton</surname><given-names>J</given-names> </name><name name-style="western"><surname>Fitzgibbon</surname><given-names>A</given-names> </name><name name-style="western"><surname>Cook</surname><given-names>M</given-names> </name><etal/></person-group><article-title>Real-time human pose recognition in parts from single depth images</article-title><conf-name>Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR)</conf-name><conf-date>Jun 20-25, 2011</conf-date><pub-id pub-id-type="doi">10.1109/CVPR.2011.5995316</pub-id></nlm-citation></ref><ref id="ref46"><label>46</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Debevc</surname><given-names>M</given-names> </name><name name-style="western"><surname>Kosec</surname><given-names>P</given-names> </name><name name-style="western"><surname>Holzinger</surname><given-names>A</given-names> </name></person-group><article-title>Improving multimodal web accessibility for deaf people: sign language interpreter module</article-title><source>Multimed Tools Appl</source><year>2011</year><month>08</month><volume>54</volume><issue>1</issue><fpage>181</fpage><lpage>199</lpage><pub-id pub-id-type="doi">10.1007/s11042-010-0529-8</pub-id></nlm-citation></ref><ref id="ref47"><label>47</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Madhiarasan</surname><given-names>M</given-names> </name><name name-style="western"><surname>Roy</surname><given-names>PP</given-names> </name></person-group><article-title>A comprehensive review of sign language recognition: different types, modalities, and datasets</article-title><source>arXiv</source><comment>Preprint posted online on  Apr 7, 2022</comment><pub-id pub-id-type="doi">10.48550/arXiv.2204.03328</pub-id></nlm-citation></ref><ref id="ref48"><label>48</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Haji Mohd</surname><given-names>MN</given-names> </name><name name-style="western"><surname>Mohd Asaari</surname><given-names>MS</given-names> </name><name name-style="western"><surname>Lay Ping</surname><given-names>O</given-names> </name><name name-style="western"><surname>Rosdi</surname><given-names>BA</given-names> </name></person-group><article-title>Vision-based hand detection and tracking using fusion of kernelized correlation filter and single-shot detection</article-title><source>Appl Sci (Basel)</source><year>2023</year><volume>13</volume><issue>13</issue><fpage>7433</fpage><pub-id pub-id-type="doi">10.3390/app13137433</pub-id></nlm-citation></ref><ref id="ref49"><label>49</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Al-Qurishi</surname><given-names>M</given-names> </name><name name-style="western"><surname>Khalid</surname><given-names>T</given-names> </name><name name-style="western"><surname>Souissi</surname><given-names>R</given-names> </name></person-group><article-title>Deep learning for sign language recognition: current techniques, benchmarks, and open issues</article-title><source>IEEE Access</source><year>2021</year><volume>9</volume><fpage>126917</fpage><lpage>126951</lpage><pub-id pub-id-type="doi">10.1109/ACCESS.2021.3110912</pub-id></nlm-citation></ref><ref id="ref50"><label>50</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Yang</surname><given-names>R</given-names> </name><name name-style="western"><surname>Sarkar</surname><given-names>S</given-names> </name><name name-style="western"><surname>Loeding</surname><given-names>B</given-names> </name></person-group><article-title>Handling movement epenthesis and hand segmentation ambiguities in continuous sign language recognition using nested dynamic programming</article-title><source>IEEE Trans Pattern Anal Mach Intell</source><year>2009</year><volume>32</volume><issue>3</issue><fpage>462</fpage><lpage>477</lpage><pub-id pub-id-type="doi">10.1109/TPAMI.2009.26</pub-id></nlm-citation></ref><ref id="ref51"><label>51</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Amin</surname><given-names>MS</given-names> </name><name name-style="western"><surname>Rizvi</surname><given-names>STH</given-names> </name><name name-style="western"><surname>Hossain</surname><given-names>MM</given-names> </name></person-group><article-title>A comparative review on applications of different sensors for sign language recognition</article-title><source>J Imaging</source><year>2022</year><month>04</month><day>2</day><volume>8</volume><issue>4</issue><fpage>98</fpage><pub-id pub-id-type="doi">10.3390/jimaging8040098</pub-id><pub-id pub-id-type="medline">35448225</pub-id></nlm-citation></ref><ref id="ref52"><label>52</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Figg</surname><given-names>B</given-names> </name></person-group><article-title>Deafness and other communication disorders</article-title><source>J Consum Health Internet</source><year>2023</year><month>04</month><day>3</day><volume>27</volume><issue>2</issue><fpage>199</fpage><lpage>209</lpage><pub-id pub-id-type="doi">10.1080/15398285.2023.2204717</pub-id></nlm-citation></ref><ref id="ref53"><label>53</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Bihari Dash</surname><given-names>D</given-names> </name></person-group><article-title>Significance of globalisation and English language</article-title><source>Int J Stud Engl Lang Lit</source><year>2022</year><volume>10</volume><issue>5</issue><fpage>10</fpage><lpage>16</lpage><pub-id pub-id-type="doi">10.20431/2347-3134.1005002</pub-id></nlm-citation></ref><ref id="ref54"><label>54</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Nye</surname><given-names>B</given-names> </name><name name-style="western"><surname>Jessy Li</surname><given-names>J</given-names> </name><name name-style="western"><surname>Patel</surname><given-names>R</given-names> </name><etal/></person-group><article-title>A corpus with multi-level annotations of patients, interventions and outcomes to support language processing for medical literature</article-title><conf-name>Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)</conf-name><conf-date>Jul 15-20, 2018</conf-date><pub-id pub-id-type="doi">10.18653/v1/P18-1019</pub-id></nlm-citation></ref><ref id="ref55"><label>55</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Zheng</surname><given-names>L</given-names> </name><name name-style="western"><surname>Liang</surname><given-names>B</given-names> </name><name name-style="western"><surname>Jiang</surname><given-names>A</given-names> </name></person-group><article-title>Recent advances of deep learning for sign language recognition</article-title><conf-name>2017 International Conference on Digital Image Computing</conf-name><conf-date>Nov 29 to Dec 1, 2017</conf-date><pub-id pub-id-type="doi">10.1109/DICTA.2017.8227483</pub-id></nlm-citation></ref><ref id="ref56"><label>56</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Jim&#x00E9;nez-Arberas</surname><given-names>E</given-names> </name><name name-style="western"><surname>D&#x00ED;ez</surname><given-names>E</given-names> </name></person-group><article-title>Psychosocial impact of assistive devices and other technologies on deaf and hard of hearing people</article-title><source>Int J Environ Res Public Health</source><year>2021</year><month>07</month><day>7</day><volume>18</volume><issue>14</issue><fpage>7259</fpage><pub-id pub-id-type="doi">10.3390/ijerph18147259</pub-id><pub-id pub-id-type="medline">34299710</pub-id></nlm-citation></ref><ref id="ref57"><label>57</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Adeyanju</surname><given-names>IA</given-names> </name><name name-style="western"><surname>Bello</surname><given-names>OO</given-names> </name><name name-style="western"><surname>Adegboye</surname><given-names>MA</given-names> </name></person-group><article-title>Machine learning methods for sign language recognition: a critical review and analysis</article-title><source>Intell Syst Appl</source><year>2021</year><month>11</month><volume>12</volume><fpage>200056</fpage><pub-id pub-id-type="doi">10.1016/j.iswa.2021.200056</pub-id></nlm-citation></ref><ref id="ref58"><label>58</label><nlm-citation citation-type="web"><article-title>Ergonomics of human-system interaction: Part 210: Human-centered design for interactive systems</article-title><source>International Organization for Standardization</source><access-date>2025-04-13</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.iso.org/obp/ui/#iso:std:iso:9241:-210:ed-1:v1:en">https://www.iso.org/obp/ui/#iso:std:iso:9241:-210:ed-1:v1:en</ext-link></comment></nlm-citation></ref><ref id="ref59"><label>59</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Saeed</surname><given-names>ZR</given-names> </name><name name-style="western"><surname>Zainol</surname><given-names>ZB</given-names> </name><name name-style="western"><surname>Zaidan</surname><given-names>BB</given-names> </name><name name-style="western"><surname>Alamoodi</surname><given-names>AH</given-names> </name></person-group><article-title>A systematic review on systems-based sensory gloves for sign language pattern recognition: an update from 2017 to 2022</article-title><source>IEEE Access</source><year>2022</year><volume>10</volume><fpage>123358</fpage><lpage>123377</lpage><pub-id pub-id-type="doi">10.1109/ACCESS.2022.3219430</pub-id></nlm-citation></ref><ref id="ref60"><label>60</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ko&#x017E;uh</surname><given-names>I</given-names> </name><name name-style="western"><surname>Hintermair</surname><given-names>M</given-names> </name><name name-style="western"><surname>Holzinger</surname><given-names>A</given-names> </name><name name-style="western"><surname>Vol&#x010D;i&#x010D;</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Debevc</surname><given-names>M</given-names> </name></person-group><article-title>Enhancing universal access: deaf and hard of hearing people on social networking sites</article-title><source>Univ Access Inf Soc</source><year>2015</year><month>11</month><volume>14</volume><issue>4</issue><fpage>537</fpage><lpage>545</lpage><pub-id pub-id-type="doi">10.1007/s10209-014-0354-3</pub-id></nlm-citation></ref><ref id="ref61"><label>61</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Boggaram</surname><given-names>A</given-names> </name><name name-style="western"><surname>Boggaram</surname><given-names>A</given-names> </name><name name-style="western"><surname>Sharma</surname><given-names>A</given-names> </name><name name-style="western"><surname>Srinivasa Ramanujan</surname><given-names>A</given-names> </name><name name-style="western"><surname>Bharathi</surname><given-names>R</given-names> </name></person-group><article-title>Sign language translation systems</article-title><source>Int J Softw Sci Comput Intell</source><year>2022</year><month>01</month><volume>14</volume><issue>1</issue><fpage>1</fpage><lpage>33</lpage><pub-id pub-id-type="doi">10.4018/IJSSCI.311448</pub-id></nlm-citation></ref><ref id="ref62"><label>62</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Waghmare</surname><given-names>PP</given-names> </name></person-group><article-title>Techniques for generating sign language a comprehensive review</article-title><source>J Inst Eng India Ser B</source><year>2024</year><month>12</month><volume>105</volume><issue>6</issue><fpage>1789</fpage><lpage>1803</lpage><pub-id pub-id-type="doi">10.1007/s40031-024-01118-8</pub-id></nlm-citation></ref><ref id="ref63"><label>63</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Rowley</surname><given-names>K</given-names> </name><name name-style="western"><surname>Cormier</surname><given-names>K</given-names> </name></person-group><article-title>Accent or not? Language attitudes towards regional variation in British Sign Language</article-title><source>Appl Linguist Rev</source><year>2023</year><month>07</month><day>26</day><volume>14</volume><issue>4</issue><fpage>919</fpage><lpage>943</lpage><pub-id pub-id-type="doi">10.1515/applirev-2020-0144</pub-id></nlm-citation></ref><ref id="ref64"><label>64</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Preece</surname><given-names>J</given-names> </name><name name-style="western"><surname>Rogers</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Sharp</surname><given-names>H</given-names> </name></person-group><source>Interaction Design: Beyond Human-Computer Interaction</source><year>2023</year><edition>6</edition><publisher-name>John Wiley &#x0026; Sons</publisher-name><pub-id pub-id-type="other">978-1-119-90109-9</pub-id></nlm-citation></ref></ref-list><app-group><supplementary-material id="app1"><label>Multimedia Appendix 1</label><p>Search strategy.</p><media xlink:href="jmir_v28i1e70417_app1.docx" xlink:title="DOCX File, 2988 KB"/></supplementary-material><supplementary-material id="app2"><label>Multimedia Appendix 2</label><p>The definitions used regarding the type of sign language recognition system, the corpus formation, and the health context.</p><media xlink:href="jmir_v28i1e70417_app2.docx" xlink:title="DOCX File, 17 KB"/></supplementary-material><supplementary-material id="app3"><label>Multimedia Appendix 3</label><p>Codebook.</p><media xlink:href="jmir_v28i1e70417_app3.docx" xlink:title="DOCX File, 2489 KB"/></supplementary-material><supplementary-material id="app4"><label>Multimedia Appendix 4</label><p>Studies not retrieved.</p><media xlink:href="jmir_v28i1e70417_app4.docx" xlink:title="DOCX File, 14 KB"/></supplementary-material><supplementary-material id="app5"><label>Multimedia Appendix 5</label><p>Development and testing of different sign language recognition systems.</p><media xlink:href="jmir_v28i1e70417_app5.docx" xlink:title="DOCX File, 30 KB"/></supplementary-material><supplementary-material id="app6"><label>Multimedia Appendix 6</label><p>Technologies necessary for sign language recognition system implementation.</p><media xlink:href="jmir_v28i1e70417_app6.docx" xlink:title="DOCX File, 16 KB"/></supplementary-material><supplementary-material id="app7"><label>Checklist 1</label><p>PRISMA checklist.</p><media xlink:href="jmir_v28i1e70417_app7.docx" xlink:title="DOCX File, 32 KB"/></supplementary-material></app-group></back></article>