<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMIR</journal-id>
      <journal-id journal-id-type="nlm-ta">J Med Internet Res</journal-id>
      <journal-title>Journal of Medical Internet Research</journal-title>
      <issn pub-type="epub">1438-8871</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v26i1e58278</article-id>
      <article-id pub-id-type="pmid">39302714</article-id>
      <article-id pub-id-type="doi">10.2196/58278</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Evaluating a Natural Language Processing–Driven, AI-Assisted International Classification of Diseases, 10th Revision, Clinical Modification, Coding System for Diagnosis Related Groups in a Real Hospital Environment: Algorithm Development and Validation Study</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Leung</surname>
            <given-names>Tiffany</given-names>
          </name>
        </contrib>
        <contrib contrib-type="editor">
          <name>
            <surname>Xie</surname>
            <given-names>Feng</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Chen</surname>
            <given-names>Pei-fu</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Mistry</surname>
            <given-names>Jinal</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Southern</surname>
            <given-names>Danielle A</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author">
          <name name-style="western">
            <surname>Dai</surname>
            <given-names>Hong-Jie</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff01" ref-type="aff">1</xref>
          <xref rid="aff02" ref-type="aff">2</xref>
          <xref rid="aff03" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-1516-7255</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author">
          <name name-style="western">
            <surname>Wang</surname>
            <given-names>Chen-Kai</given-names>
          </name>
          <degrees>MSc</degrees>
          <xref rid="aff01" ref-type="aff">1</xref>
          <xref rid="aff04" ref-type="aff">4</xref>
          <xref rid="aff05" ref-type="aff">5</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-1830-2915</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Chen</surname>
            <given-names>Chien-Chang</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff06" ref-type="aff">6</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0008-0103-3089</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Liou</surname>
            <given-names>Chong-Sin</given-names>
          </name>
          <degrees>MSc</degrees>
          <xref rid="aff01" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0009-8024-9589</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author">
          <name name-style="western">
            <surname>Lu</surname>
            <given-names>An-Tai</given-names>
          </name>
          <degrees>MS</degrees>
          <xref rid="aff07" ref-type="aff">7</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0003-5424-1256</ext-link>
        </contrib>
        <contrib id="contrib6" contrib-type="author">
          <name name-style="western">
            <surname>Lai</surname>
            <given-names>Chia-Hsin</given-names>
          </name>
          <degrees>BS</degrees>
          <xref rid="aff01" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0003-9533-140X</ext-link>
        </contrib>
        <contrib id="contrib7" contrib-type="author">
          <name name-style="western">
            <surname>Shain</surname>
            <given-names>Bo-Tsz</given-names>
          </name>
          <degrees>BS</degrees>
          <xref rid="aff01" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0009-6560-8416</ext-link>
        </contrib>
        <contrib id="contrib8" contrib-type="author">
          <name name-style="western">
            <surname>Ke</surname>
            <given-names>Cheng-Rong</given-names>
          </name>
          <degrees>BS</degrees>
          <xref rid="aff01" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0002-2715-4707</ext-link>
        </contrib>
        <contrib id="contrib9" contrib-type="author">
          <name name-style="western">
            <surname>Wang</surname>
            <given-names>William Yu Chung</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff08" ref-type="aff">8</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-1279-2127</ext-link>
        </contrib>
        <contrib id="contrib10" contrib-type="author">
          <name name-style="western">
            <surname>Mir</surname>
            <given-names>Tatheer Hussain</given-names>
          </name>
          <degrees>MS</degrees>
          <xref rid="aff01" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0007-8409-8189</ext-link>
        </contrib>
        <contrib id="contrib11" contrib-type="author">
          <name name-style="western">
            <surname>Simanjuntak</surname>
            <given-names>Mutiara</given-names>
          </name>
          <degrees>MS</degrees>
          <xref rid="aff01" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-8024-6374</ext-link>
        </contrib>
        <contrib id="contrib12" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Kao</surname>
            <given-names>Hao-Yun</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff09" ref-type="aff">9</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-9390-9314</ext-link>
        </contrib>
        <contrib id="contrib13" contrib-type="author" corresp="yes" equal-contrib="yes">
          <name name-style="western">
            <surname>Tsai</surname>
            <given-names>Ming-Ju</given-names>
          </name>
          <degrees>MD, PhD</degrees>
          <xref rid="aff10" ref-type="aff">10</xref>
          <address>
            <institution>Division of Pulmonary and Critical Care Medicine</institution>
            <institution>Department of Internal Medicine</institution>
            <institution>Kaohsiung Medical University Hospital, Kaohsiung Medical University</institution>
            <addr-line>No 100, Tzyou 1st Road</addr-line>
            <addr-line>Sanmin District</addr-line>
            <addr-line>Kaohsiung, 80756</addr-line>
            <country>Taiwan</country>
            <phone>886 73121101 ext 4660035</phone>
            <email>mjt@kmu.edu.tw</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-3621-3334</ext-link>
        </contrib>
        <contrib id="contrib14" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Tseng</surname>
            <given-names>Vincent S</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff04" ref-type="aff">4</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-4853-1594</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff01">
        <label>1</label>
        <institution>Intelligent System Lab, College of Electrical Engineering and Computer Science</institution>
        <institution>Department of Electrical Engineering</institution>
        <institution>National Kaohsiung University of Science and Technology</institution>
        <addr-line>Kaohsiung</addr-line>
        <country>Taiwan</country>
      </aff>
      <aff id="aff02">
        <label>2</label>
        <institution>National Institute of Cancer Research</institution>
        <institution>National Health Research Institutes</institution>
        <addr-line>Tainan</addr-line>
        <country>Taiwan</country>
      </aff>
      <aff id="aff03">
        <label>3</label>
        <institution>Center for Big Data Research</institution>
        <institution>Kaohsiung Medical University</institution>
        <addr-line>Kaohsiung</addr-line>
        <country>Taiwan</country>
      </aff>
      <aff id="aff04">
        <label>4</label>
        <institution>Department of Computer Science</institution>
        <institution>National Yang Ming Chiao Tung University</institution>
        <addr-line>Hsinchu</addr-line>
        <country>Taiwan</country>
      </aff>
      <aff id="aff05">
        <label>5</label>
        <institution>Advanced Technology Laboratory</institution>
        <institution>Chunghwa Telecom Laboratories</institution>
        <addr-line>Taoyuan</addr-line>
        <country>Taiwan</country>
      </aff>
      <aff id="aff06">
        <label>6</label>
        <institution>Electromagnetic Sensing Control and AI Computing System Laboratory</institution>
        <institution>Department of Electrical Engineering, College of Electrical Engineering and Computer Science</institution>
        <institution>National Kaohsiung University of Science and Technology</institution>
        <addr-line>Kaohsiung</addr-line>
        <country>Taiwan</country>
      </aff>
      <aff id="aff07">
        <label>7</label>
        <institution>School of Post-Baccalaureate Medicine</institution>
        <institution>Kaohsiung Medical University</institution>
        <addr-line>Kaohsiung</addr-line>
        <country>Taiwan</country>
      </aff>
      <aff id="aff08">
        <label>8</label>
        <institution>Waikato Management School</institution>
        <institution>University of Waikato</institution>
        <addr-line>Hamilton</addr-line>
        <country>New Zealand</country>
      </aff>
      <aff id="aff09">
        <label>9</label>
        <institution>Department of Healthcare Administration and Medical Informatics</institution>
        <institution>College of Health Sciences</institution>
        <institution>Kaohsiung Medical University</institution>
        <addr-line>Kaohsiung</addr-line>
        <country>Taiwan</country>
      </aff>
      <aff id="aff10">
        <label>10</label>
        <institution>Division of Pulmonary and Critical Care Medicine</institution>
        <institution>Department of Internal Medicine</institution>
        <institution>Kaohsiung Medical University Hospital, Kaohsiung Medical University</institution>
        <addr-line>Kaohsiung</addr-line>
        <country>Taiwan</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Ming-Ju Tsai <email>mjt@kmu.edu.tw</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <year>2024</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>20</day>
        <month>9</month>
        <year>2024</year>
      </pub-date>
      <volume>26</volume>
      <elocation-id>e58278</elocation-id>
      <history>
        <date date-type="received">
          <day>13</day>
          <month>3</month>
          <year>2024</year>
        </date>
        <date date-type="rev-request">
          <day>24</day>
          <month>5</month>
          <year>2024</year>
        </date>
        <date date-type="rev-recd">
          <day>28</day>
          <month>6</month>
          <year>2024</year>
        </date>
        <date date-type="accepted">
          <day>17</day>
          <month>7</month>
          <year>2024</year>
        </date>
      </history>
      <copyright-statement>©Hong-Jie Dai, Chen-Kai Wang, Chien-Chang Chen, Chong-Sin Liou, An-Tai Lu, Chia-Hsin Lai, Bo-Tsz Shain, Cheng-Rong Ke, William Yu Chung Wang, Tatheer Hussain Mir, Mutiara Simanjuntak, Hao-Yun Kao, Ming-Ju Tsai, Vincent S Tseng. Originally published in the Journal of Medical Internet Research (https://www.jmir.org), 20.09.2024.</copyright-statement>
      <copyright-year>2024</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in the Journal of Medical Internet Research (ISSN 1438-8871), is properly cited. The complete bibliographic information, a link to the original publication on https://www.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://www.jmir.org/2024/1/e58278" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>International Classification of Diseases codes are widely used to describe diagnosis information, but manual coding relies heavily on human interpretation, which can be expensive, time consuming, and prone to errors. With the transition from the <italic>International Classification of Diseases, Ninth Revision</italic>, to the <italic>International Classification of Diseases, Tenth Revision</italic> (<italic>ICD-10</italic>), the coding process has become more complex, highlighting the need for automated approaches to enhance coding efficiency and accuracy. Inaccurate coding can result in substantial financial losses for hospitals, and a precise assessment of outcomes generated by a natural language processing (NLP)–driven autocoding system thus assumes a critical role in safeguarding the accuracy of the Taiwan diagnosis related groups (Tw-DRGs).</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>This study aims to evaluate the feasibility of applying an <italic>International Classification of Diseases, Tenth Revision, Clinical Modification</italic> (<italic>ICD-10-CM</italic>), autocoding system that can automatically determine diagnoses and codes based on free-text discharge summaries to facilitate the assessment of Tw-DRGs, specifically principal diagnosis and major diagnostic categories (MDCs).</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>By using the patient discharge summaries from Kaohsiung Medical University Chung-Ho Memorial Hospital (KMUCHH) from April 2019 to December 2020 as a reference data set we developed artificial intelligence (AI)–assisted <italic>ICD-10-CM</italic> coding systems based on deep learning models. We constructed a web-based user interface for the AI-assisted coding system and deployed the system to the workflow of the certified coding specialists (CCSs) of KMUCHH. The data used for the assessment of Tw-DRGs were manually curated by a CCS with the principal diagnosis and MDC was determined from discharge summaries collected at KMUCHH from February 2023 to April 2023.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>Both the reference data set and real hospital data were used to assess performance in determining <italic>ICD-10-CM</italic> coding, principal diagnosis, and MDC for Tw-DRGs. Among all methods, the GPT-2 (OpenAI)-based model achieved the highest <italic>F</italic><sub>1</sub>-score, 0.667 (<italic>F</italic><sub>1</sub>-score 0.851 for the top 50 codes), on the KMUCHH test set and a slightly lower <italic>F</italic><sub>1</sub>-score, 0.621, in real hospital data. Cohen κ evaluation for the agreement of MDC between the models and the CCS revealed that the overall average κ value for GPT-2 (κ=0.714) was approximately 12.2 percentage points higher than that of the hierarchy attention network (κ=0.592). GPT-2 demonstrated superior agreement with the CCS across 6 categories of MDC, with an average κ value of approximately 0.869 (SD 0.033), underscoring the effectiveness of the developed AI-assisted coding system in supporting the work of CCSs.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>An NLP-driven AI-assisted coding system can assist CCSs in <italic>ICD-10-CM</italic> coding by offering coding references via a user interface, demonstrating the potential to reduce the manual workload and expedite Tw-DRG assessment. Consistency in performance affirmed the effectiveness of the system in supporting CCSs in <italic>ICD-10-CM</italic> coding and the judgment of Tw-DRGs.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>natural language processing</kwd>
        <kwd>International Classification of Diseases</kwd>
        <kwd>deep learning</kwd>
        <kwd>electronic medical record</kwd>
        <kwd>Taiwan diagnosis related groups</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <sec>
        <title>Background</title>
        <p>The International Classification of Diseases (ICD) [<xref ref-type="bibr" rid="ref1">1</xref>], established by the World Health Organization, is a crucial medical classification system that defines the universe of diseases, disorders, injuries, and other related health conditions. Since its first publication in 1893, the ICD has become a widely adopted standard across various health care facilities and settings globally, providing consistency and accuracy in disease diagnosis and classification. In 1992, the World Health Organization published the <italic>International Classification of Diseases, Tenth Revision</italic> (<italic>ICD-10</italic>), which has since been widely adopted worldwide [<xref ref-type="bibr" rid="ref2">2</xref>,<xref ref-type="bibr" rid="ref3">3</xref>]. Many countries extended and customized the <italic>ICD-10</italic> classification system for their country-specific reporting purposes, such as <italic>International Classification of Diseases, Tenth Revision, Clinical Modification</italic> (<italic>ICD-10-CM</italic>) in the United States, <italic>International Classification of Diseases, Tenth Revision, Canadian Modification</italic> in Canada, <italic>International Classification of Diseases, Tenth Revision, German Modification</italic> in Germany, and <italic>International Classification of Diseases, Tenth Revision, Australian Modification</italic> that is followed by Australia as well as 15 other countries including Ireland, Singapore, and Saudi Arabia.</p>
        <p>The <italic>ICD-10-CM</italic> is an ICD system that classifies patients according to the type of illness, severity, and the location of the disease that was developed to describe more clinical details with the increasing number of diagnoses and procedural codes applied in payment methodologies [<xref ref-type="bibr" rid="ref4">4</xref>,<xref ref-type="bibr" rid="ref5">5</xref>]. As a result, the <italic>ICD-10-CM</italic> coding task has become a crucial element in various fields, such as disease surveillance [<xref ref-type="bibr" rid="ref6">6</xref>], health services management [<xref ref-type="bibr" rid="ref7">7</xref>], and clinical research [<xref ref-type="bibr" rid="ref8">8</xref>]. For the National Health Insurance Administration (NHIA) in Taiwan, <italic>ICD-10-CM</italic> holds immense significance by serving as a standardized coding system for the statistical analysis of disease diagnosis, surgical treatment for patients admitted to hospital, and payment of health insurance. In 2016, the NHIA in Taiwan followed the global trend and transitioned from the International <italic>Classification of Diseases, Ninth Revision, Clinical Modification</italic> (<italic>ICD-9</italic>-<italic>CM</italic>), to <italic>ICD-10-CM</italic>, which expanded the number of codes available. Previously, the <italic>ICD-9</italic>-<italic>CM</italic> contained approximately 14,000 diagnosis codes, while the <italic>ICD-10-CM</italic> had approximately 69,000 diagnosis codes [<xref ref-type="bibr" rid="ref5">5</xref>]. The NHIA is currently using the 2014 version of <italic>ICD-10-CM</italic>, which consists of approximately 71,900 diagnosis codes. Nowadays, many medical institutions are relying on licensed certified coding specialists (CCSs) to manually assign <italic>ICD-10</italic> codes to inpatients. These coders spend a significant amount of time reviewing various medical materials to accurately diagnose and code each patient’s condition. Due to the complexity of the ICD-10 structure and coding rules, the task of coding is significantly more labor-intensive and time-consuming than <italic>ICD-9</italic>, even when performed by a skilled CCS who typically dedicates approximately 30 minutes per case on average. In response to these challenges, investigators [<xref ref-type="bibr" rid="ref9">9</xref>-<xref ref-type="bibr" rid="ref11">11</xref>] have applied both rule-based algorithms and machine learning methods, such as recurrent neural network, long short-term memory, and bidirectional encoder representations from transformers (BERT), to classify patients with specific conditions. However, the performance of these approaches remains limited.</p>
        <p>By contrast, to control the rising health care costs, health authorities in many countries have implemented the diagnosis related group (DRG) payment system. The NHIA in Taiwan has implemented the Taiwan DRG (Tw-DRG) payment systems since 2010 [<xref ref-type="bibr" rid="ref12">12</xref>] to consolidate related DRGs for the purpose of determining payment standards [<xref ref-type="bibr" rid="ref13">13</xref>]. Tw-DRGs are classified by major diagnostic categories (MDCs) that depend on the assigned <italic>ICD-10-CM</italic> codes. Consequently, accurate <italic>ICD-10-CM</italic> coding is critical for the accurate generation of Tw-DRGs, as coding errors can lead to inappropriate treatment options, delayed reimbursement processes, and significant financial losses for hospitals [<xref ref-type="bibr" rid="ref14">14</xref>,<xref ref-type="bibr" rid="ref15">15</xref>]. Factors including incomplete information, incorrect data entry, and insufficient coder expertise can lead to inaccurate coding [<xref ref-type="bibr" rid="ref16">16</xref>]. In addition, errors may also arise from incorrect human perception [<xref ref-type="bibr" rid="ref17">17</xref>], the complex technical nature of the work [<xref ref-type="bibr" rid="ref18">18</xref>], and human fatigue from heavy workloads [<xref ref-type="bibr" rid="ref19">19</xref>]. While natural language processing (NLP)–driven autocoding systems have the potential to enhance the quality of the manual coding results and expedite code assignment, it is imperative to assess their accuracy to ensure cost savings for the hospitals [<xref ref-type="bibr" rid="ref20">20</xref>]. In summary, precise coding according to the <italic>ICD-10-CM</italic> system is essential for accurately categorizing Tw-DRGs, as any fault in this process can lead to misclassification and subsequently impact health care reimbursements.</p>
      </sec>
      <sec>
        <title>Study Overview</title>
        <p>In this study, we developed 2 deep learning–based models, the hierarchical attention network (HAN) and the GPT-2 (OpenAI), in the manner of multi-label supervised learning for <italic>ICD-10-CM</italic> coding. The former is a conventional classification model, and the latter is categorized as a generative model.</p>
        <p>To facilitate the coding process in the real hospital environment, we established an NLP-driven <italic>ICD-10-CM</italic> autocoding system. This system includes a user-friendly visual interface to display the predicted coding results, CIs, and relevant medical record keywords. We integrated the system into the coding procedure protocolled at Kaohsiung Medical University Chung-Ho Memorial Hospital (KMUCHH) to assist the workflow of clinical coders to expedite the efficiency of disease coding. We then compared the consistency of the principal diagnosis and MDC coding between the autocoding system and the data curated by the CCS.</p>
      </sec>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Data Set and Task Definition</title>
        <sec>
          <title>Data Collection and Preparation</title>
          <p>This study used a total of 136,841 unstructured discharge summaries of patients who were hospitalized, recorded in the KMUCHH from April 1, 2019, to December 31, 2020, as the primary data source for the development of the <italic>ICD-10-CM</italic> coding system. After deploying the developed system in the workflow of the KMUCHH CSS, we collected an additional data set containing 2632 discharge cases processed by the system from February 2023, to April 2023, to assess the performance of the <italic>ICD-10-CM</italic> coding system in the real hospital environment and the feasibility of assisting the process of Tw-DRGs. The original data set contains 15,756 discharge cases. After excluding 7541 (47.86%) non–Tw-DRGs cases and 5583 (35.43%) with incomplete electronic medical records (EMRs), a data set of 2632 (16.7%) discharge cases was used for our evaluation.</p>
          <p>To maximize the amount of data available for the training phase of our system, we composed a test set by selecting the latest 1000 discharge summaries from the raw data sorted by time stamp. The remaining summaries were then allocated for use as the training set. During the training phase, we randomly sampled 5% (6842/136,841) of the discharge summaries to form the validation set consisting of 6842 summaries. As depicted in <xref ref-type="table" rid="table1">Table 1</xref>, a total of 129,000 (94.27%) of 136,841 discharge summaries were assigned to the training set (comprising a total of 567,957 <italic>ICD-10-CM</italic> codes, with 11,494 (2.02%) being unique), 6842 (5%) discharge summaries were assigned to the validation set (with a total of 36,205 codes, including 4038, 11.15% unique ones), and 1000 (0.73%) discharge summaries were included in the test set with a total of 10,412 codes and 1482 (14.23%) of them being unique.</p>
          <table-wrap position="float" id="table1">
            <label>Table 1</label>
            <caption>
              <p>Prevalence of the 2014 International Classification of Diseases, Tenth Revision, Clinical Modification codes used in the compiled data set consisting of 136,841 discharge summaries.</p>
            </caption>
            <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
              <col width="110"/>
              <col width="100"/>
              <col width="270"/>
              <col width="170"/>
              <col width="170"/>
              <col width="180"/>
              <thead>
                <tr valign="top">
                  <td>Chapters</td>
                  <td>Blocks</td>
                  <td>Definitions</td>
                  <td>Codes in training set (n=567,957), n (%)</td>
                  <td>Codes in validation set (n=36,205), n (%)</td>
                  <td>Codes in test set (n=10,412), n (%)</td>
                </tr>
              </thead>
              <tbody>
                <tr valign="top">
                  <td>I</td>
                  <td>A00-B99</td>
                  <td>Certain infectious and parasitic diseases</td>
                  <td>35,708 (6.29)</td>
                  <td>2233 (6.17)</td>
                  <td>341 (3.28)</td>
                </tr>
                <tr valign="top">
                  <td>II</td>
                  <td>C00-D48</td>
                  <td>Neoplasms</td>
                  <td>82,248 (14.48)</td>
                  <td>4838 (13.36)</td>
                  <td>827 (7.94)</td>
                </tr>
                <tr valign="top">
                  <td>III</td>
                  <td>D50-D89</td>
                  <td>Diseases of the blood and blood-forming organs and certain disorders involving the immune mechanism</td>
                  <td>17,347 (3.05)</td>
                  <td>1281 (3.54)</td>
                  <td>168 (1.61)</td>
                </tr>
                <tr valign="top">
                  <td>IV</td>
                  <td>E00-E90</td>
                  <td>Endocrine, nutritional, and metabolic diseases</td>
                  <td>63,545 (11.19)</td>
                  <td>4077 (11.26)</td>
                  <td>301 (2.89)</td>
                </tr>
                <tr valign="top">
                  <td>V</td>
                  <td>F00-F99</td>
                  <td>Mental and behavioral disorders</td>
                  <td>8835 (1.56)</td>
                  <td>565 (1.56)</td>
                  <td>289 (2.78)</td>
                </tr>
                <tr valign="top">
                  <td>VI</td>
                  <td>G00-G99</td>
                  <td>Diseases of the nervous system</td>
                  <td>13,236 (2.33)</td>
                  <td>835 (2.31)</td>
                  <td>395 (3.79)</td>
                </tr>
                <tr valign="top">
                  <td>VII</td>
                  <td>H00-H59</td>
                  <td>Diseases of the eye and adnexa</td>
                  <td>8520 (1.5)</td>
                  <td>421 (1.16)</td>
                  <td>661 (6.35)</td>
                </tr>
                <tr valign="top">
                  <td>VIII</td>
                  <td>H60-H95</td>
                  <td>Diseases of the ear and mastoid process</td>
                  <td>1304 (0.23)</td>
                  <td>120 (0.33)</td>
                  <td>193 (1.85)</td>
                </tr>
                <tr valign="top">
                  <td>IX</td>
                  <td>I00-I99</td>
                  <td>Diseases of the circulatory system</td>
                  <td>82,005 (14.44)</td>
                  <td>5066 (13.99)</td>
                  <td>636 (6.11)</td>
                </tr>
                <tr valign="top">
                  <td>X</td>
                  <td>J00-J99</td>
                  <td>Diseases of the respiratory system</td>
                  <td>31,233 (5.5)</td>
                  <td>2091 (5.78)</td>
                  <td>285 (2.74)</td>
                </tr>
                <tr valign="top">
                  <td>XI</td>
                  <td>K00-K93</td>
                  <td>Diseases of the digestive system</td>
                  <td>48,827 (8.6)</td>
                  <td>3098 (8.56)</td>
                  <td>514 (4.94)</td>
                </tr>
                <tr valign="top">
                  <td>XII</td>
                  <td>L00-L99</td>
                  <td>Diseases of the skin and subcutaneous tissue</td>
                  <td>7469 (1.32)</td>
                  <td>488 (1.35)</td>
                  <td>372 (3.57)</td>
                </tr>
                <tr valign="top">
                  <td>XIII</td>
                  <td>M00-M99</td>
                  <td>Diseases of the musculoskeletal system and connective tissue</td>
                  <td>20,312 (3.58)</td>
                  <td>1296 (3.58)</td>
                  <td>1191 (11.44)</td>
                </tr>
                <tr valign="top">
                  <td>XIV</td>
                  <td>N00-N99</td>
                  <td>Diseases of the genitourinary system</td>
                  <td>39,504 (6.96)</td>
                  <td>2498 (6.9)</td>
                  <td>328 (3.15)</td>
                </tr>
                <tr valign="top">
                  <td>XV</td>
                  <td>O00-O99</td>
                  <td>Pregnancy, childbirth, and the puerperium</td>
                  <td>3887 (0.68)</td>
                  <td>255 (0.70)</td>
                  <td>394 (3.78)</td>
                </tr>
                <tr valign="top">
                  <td>XVI</td>
                  <td>P00-P96</td>
                  <td>Certain conditions originating in the perinatal period</td>
                  <td>4297 (0.76)</td>
                  <td>271 (0.75)</td>
                  <td>202 (1.94)</td>
                </tr>
                <tr valign="top">
                  <td>XVII</td>
                  <td>Q00-Q99</td>
                  <td>Congenital malformations, deformations, and chromosomal abnormalities</td>
                  <td>2918 (0.51)</td>
                  <td>171 (0.47)</td>
                  <td>315 (3.03)</td>
                </tr>
                <tr valign="top">
                  <td>XVIII</td>
                  <td>R00-R99</td>
                  <td>Symptoms, signs, and abnormal clinical and laboratory findings, not elsewhere classified</td>
                  <td>24,544 (4.32)</td>
                  <td>1636 (4.52)</td>
                  <td>392 (3.76)</td>
                </tr>
                <tr valign="top">
                  <td>XIX</td>
                  <td>S00-T98</td>
                  <td>Injury, poisoning, and certain other consequences of external causes</td>
                  <td>20,850 (3.67)</td>
                  <td>1783 (4.92)</td>
                  <td>1661 (15.95)</td>
                </tr>
                <tr valign="top">
                  <td>XX</td>
                  <td>V01-Y98</td>
                  <td>External causes of morbidity and mortality</td>
                  <td>11,791 (2.08)</td>
                  <td>711 (1.96)</td>
                  <td>530 (5.09)</td>
                </tr>
                <tr valign="top">
                  <td>XXI</td>
                  <td>Z00-Z99</td>
                  <td>Factors influencing health status and contact with health services</td>
                  <td>39,577 (6.97)</td>
                  <td>2471 (6.83)</td>
                  <td>417 (4)</td>
                </tr>
              </tbody>
            </table>
          </table-wrap>
        </sec>
        <sec>
          <title>ICD-10-CM Coding Task Formulation</title>
          <p>The <italic>ICD-10-CM</italic> coding task presented in this study aims to develop an NLP autocoding system for generating <italic>ICD-10-CM</italic> codes from a patient’s discharge summary. An <italic>ICD-10-CM</italic> code consists of 3 to 7 characters, and each code begins with an alphabetic character that signifies the relevant classification chapter. The first 3 characters in the code designate the category of the diagnosis, while the subsequent 3 characters correspond to the related etiology. The seventh character provides the related extensions. As shown in <xref ref-type="table" rid="table1">Table 1</xref>, there are 21 chapters in <italic>ICD-10-CM</italic>. The ground-truth <italic>ICD-10-CM</italic> codes for the compiled 13,6841 discharge summaries were annotated by specialized CCSs at KMUCHH, culminating in a grand total of 11,653 unique codes. <xref ref-type="boxed-text" rid="box1">Textbox 1</xref> displays an example of the discharge summary from the KMUCHH. The output of the coding system for each summary encompasses a main code along with multiple other codes, thereby formulating the task as a multi-labeling classification problem. Within our data set, the unique count for main codes and other codes is 5835 and 10,393, respectively. Moreover, the generated main codes will serve as the principal diagnosis, while the other codes will be used as secondary diagnoses in the Tw-DRGs estimation process.</p>
          <boxed-text id="box1" position="float">
            <title>An example of the discharge summary from Kaohsiung Medical University Chung-Ho Memorial Hospital.</title>
            <p>Chief complaint:</p>
            <p>Acute urine retention after discharge today</p>
            <p>Impression on Admission:</p>
            <p>Acute urine retention.</p>
            <p>Bladder stone s/p op.</p>
            <p>Hypertension.</p>
            <p>Discharge Diagnosis:</p>
            <p>--underlying--</p>
            <p>#Acute urine retention.</p>
            <p>#Hypertension.</p>
            <p>#Hyperlipidemia</p>
            <p>History on Admission:</p>
            <p>This 71 y/o male has history of hypertension and under regular medical control. He was just discharged from our ward bladder stone and accepted surgical intervention (Endoscopic cystolithotripsy) in last admission. This time, he ...</p>
          </boxed-text>
        </sec>
      </sec>
      <sec>
        <title>Data Preprocessing</title>
        <p>During the preprocessing stage, delimiters (eg, “--underlying--” shown in <xref ref-type="boxed-text" rid="box1">Textbox 1</xref>) and prefix symbols (such as “#”) in the unstructured discharge summaries were filtered out. Subsequently, the clinical NLP toolkit was leveraged to preprocess the summaries by applying sentence splitting and tokenization [<xref ref-type="bibr" rid="ref21">21</xref>].</p>
      </sec>
      <sec>
        <title>Deep Learning Models</title>
        <p>As an effective approach to the coding task, we formulated it as a multi-label supervised learning problem and applied 2 deep learning–based methods to the compiled data set. The first model is grounded in HAN, a neural network architecture specifically crafted to tackle the complexities of modeling hierarchical structures within text data [<xref ref-type="bibr" rid="ref22">22</xref>]. HAN leverages attention mechanisms to capture fine-grained information at both the document and sentence levels. The second model is a generative network built upon GPT-2; a causal language model released by OpenAI which was pretrained on large-scale text data [<xref ref-type="bibr" rid="ref23">23</xref>]. The architecture is also featured with an attention mechanism that enables it to comprehend and generate natural language. HAN was selected for its capacity to provide high readability and interpretability of text through visualization [<xref ref-type="bibr" rid="ref24">24</xref>]. It leverages hierarchical text representation and attention mechanisms to effectively highlight important words and sentences at multiple levels of the text. This enhances the interpretability of the model to allow a better understanding of its internal workings and gain insights into the model's decision-making process with a strengthened trust in its outputs [<xref ref-type="bibr" rid="ref25">25</xref>]. Furthermore, considering the growing significance of generative artificial intelligence (AI) in research and landing applications, large language models such as GPT-2 are poised to bring about significant transformation in clinical medicine and health care and will be ubiquitous in these fields. However, concerns regarding data sensitivity, inference speed, hardware requirements, as well as the ease of deployment and difficulty of maintenance of the system are paramount [<xref ref-type="bibr" rid="ref26">26</xref>]. In light of these considerations, we opted for the GPT-2 model due to its balance of performance and practicality in our real clinical environment setting.</p>
        <p>Our HAN implementation adheres to the original network architecture and is tailored specifically for the multi-labeling classification task of <italic>ICD-10-CM</italic> codes. The customization involves the application of a fully connected layer to transform the attention results of HAN into the desired number of target ICD codes as illustrated in <xref rid="figure1" ref-type="fig">Figure 1</xref>. The process begins in the embedding layer, where we use pretrained global vector embeddings containing 300D vectors trained on a corpus of 6 billion tokens [<xref ref-type="bibr" rid="ref27">27</xref>] to extract essential information from the textual data. Following the embedding layer, we execute word-level and sentence-level encoding procedures, which use an attention mechanism to capture crucial words and sentences within the text. Subsequently, we implement a fully connected layer to generate a set of 11,653 unique <italic>ICD-10-CM</italic> codes. Finally, the output values are transformed into a range between 0 and 1 using a sigmoid layer, which represents the probability of the ICD code being related to the summary. The loss function used for the HAN model is the cross-entropy loss defined in Equation (1), which measures the sum of the negative log-likelihood of the probabilities of the actual labels. A large deviation between and the actual label cause a greater value in the loss and thus is penalized more during training.</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>Architecture of the hierarchical attention network implementation in this study. ICD-10-CM: International Classification of Diseases, 10th Revision, Clinical Modification.</p>
          </caption>
          <graphic xlink:href="jmir_v26i1e58278_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <disp-formula>
          <graphic xlink:href="jmir_v26i1e58278_fig6.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </disp-formula>
        <p>For inference, a threshold is set to assign the main code and other codes to the document based on the estimated label probability set following equations 2 and 3.</p>
        <disp-formula>argmax(p) <bold>(2)</bold></disp-formula>
        <disp-formula>
          <graphic xlink:href="jmir_v26i1e58278_fig7.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </disp-formula>
        <p>As shown in the aforementioned equations, the threshold <italic>t</italic> is applied to determine the predicted probability <inline-graphic xlink:href="jmir_v26i1e58278_fig8.png" xlink:type="simple" mimetype="image"/>
 for <italic>ICD-10-CM</italic> codes, with the code having the highest probability selected as the <italic>ICD-10-CM</italic> main code. The main code is subsequently used as the principal diagnosis in the Tw-DRG process. After conducting multiple experiments, we set the threshold <italic>t</italic> in equation (3) as 0.5 as it has been determined that this setting achieves a better balance between the precision and recall.</p>
        <p>In our GPT-2 implementation, we used a pretrained model developed by Papanikolaou and Pierleoni [<xref ref-type="bibr" rid="ref28">28</xref>], which was a model based on the GPT-2 architecture fine-tuned using 0.5 million PubMed abstracts with 355 million parameters. This model was adopted instead of the original GPT-2 released by OpenAI to address the issue of the variety of medical vocabularies conveyed in the summaries, which include diverse clinical data such as symptoms, diseases, and medications written in varying styles. To further fine-tune the pretrained model on our data set, we added special tokens to the model’s tokenizer to help the model understand the structure of the sequence to complete the coding tasks. A total of 3 special tokens including “CLS,” “SEP,” and “#@#” were defined, which represent the starting position symbol of the input, the ending position symbol of the sentence, and the separator between <italic>ICD-10-CM</italic> main code (MAINCODE) and other codes (OTHERCODE), respectively. An example of a fine-tuned instance after processing is shown in <xref rid="figure2" ref-type="fig">Figure 2</xref>. The coding task is then formulated in a generative manner that is given an input text (<italic>eg</italic>, the prompt part shown in <xref rid="figure2" ref-type="fig">Figure 2</xref>) with a sequence of n tokens = [x<sub>1</sub>,x<sub>2</sub>,...,x<sub>n</sub>], and the target output sequence y = [y<sub>1</sub>,y<sub>2</sub>,...,y<sub>m</sub>]. The objective is to maximize the conditional probability in the auto-regressive formulation represented in equation (4).</p>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>An example of fine-tuned data for GPT-2.</p>
          </caption>
          <graphic xlink:href="jmir_v26i1e58278_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <disp-formula>
          <graphic xlink:href="jmir_v26i1e58278_fig9.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </disp-formula>
        <p>During inference, the probability of each generated code is estimated by averaging the accumulated conditional probabilities for the tokens of the generated code. In instances where discharge summaries exceed the 1024 token limit of the GPT-2 model, we truncate the text during preprocessing to ensure it stays within the specified limit.</p>
        <p>Finally, in addition to HAN and GPT-2, we implemented 2 baselines for performance comparison, one is a BERT-based model proposed by Devlin et al [<xref ref-type="bibr" rid="ref29">29</xref>] and the other is based on the bidirectional gated recurrent unit along with the BERT-based word representation architecture proposed by Chen et al [<xref ref-type="bibr" rid="ref30">30</xref>].</p>
      </sec>
      <sec>
        <title>Integration of the Developed NLP-Driven AI-Assisted Coding System at KMUCHH</title>
        <p>To integrate the developed models within the workflow of the CSS at KMUCHH, a user interface as illustrated in <xref rid="figure3" ref-type="fig">Figure 3</xref> was implemented [<xref ref-type="bibr" rid="ref31">31</xref>]. Various components can be identified within this interface. Area (1) serves as the patient EMRs display area. It conveniently showcases the relevant unstructured text content from the EMRs with 14 selectable sections. Area (2) features 3 functional buttons from left to right. The first button is used for attention visualization as presented in <xref rid="figure4" ref-type="fig">Figure 4</xref>. To display this figure, we analyze the results of the attention layer of the deployed model to highlight key medical terms from discharge summaries.</p>
        <p>The second button initiates the <italic>ICD-10-CM</italic> automatic coding, which exploits the developed model to generate <italic>ICD-10-CM</italic> codes based on the content shown in area (1). The third button is the save button, which is used to store the results of <italic>ICD-10-CM</italic> autocoding, user selections, and system operation time stamps in the database. The search bar is located in area (3) and can be used by the CCS to manually look up <italic>ICD-10-CM</italic> codes or keywords in English to assist in manually adding missing codes. Any newly added codes are combined with the selected codes and displayed in area (4). Finally, area (5) provides a list of <italic>ICD-10-CM</italic> codes recommended by the deployed model through the coding assistant process. These results are presented in a checklist format as illustrated in <xref rid="figure3" ref-type="fig">Figure 3</xref>, including <italic>ICD-10-CM</italic> codes, their corresponding Chinese and English descriptions, and the confidence assigned by the deep learning model.</p>
        <fig id="figure3" position="float">
          <label>Figure 3</label>
          <caption>
            <p>User interface of a natural language processing–driven artificial intelligence–assisted coding system at Kaohsiung Medical University Chung-Ho Memorial Hospital.</p>
          </caption>
          <graphic xlink:href="jmir_v26i1e58278_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <fig id="figure4" position="float">
          <label>Figure 4</label>
          <caption>
            <p>Visualization of the attentive key terms highlighted in a given discharge summary by the attention mechanism.</p>
          </caption>
          <graphic xlink:href="jmir_v26i1e58278_fig4.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <p>The user interface and coding system were deployed and tested in January 2023 at KMUCHH. An overview of the integrated workflow is displayed in <xref rid="figure5" ref-type="fig">Figure 5</xref>. First, the CCS uses the user interface developed for the AI-assisted coding system to send requests to the hospital information system. Once the hospital information system accepts the request, the requested EMRs are transferred to the structured query language server database specifically developed for the study. Subsequently, a notification is dispatched to the user interface. Upon receiving the notification, the user interface retrieves the corresponding medical record from the database and uses the developed model to generate suggested <italic>ICD-10-CM</italic> codes. The recommendations are then presented in area (5) of the user interface as shown in <xref rid="figure3" ref-type="fig">Figure 3</xref>. Finally, the CCS reviews the recommendations and selects the final <italic>ICD-10-CM</italic> codes in area (4) of <xref rid="figure3" ref-type="fig">Figure 3</xref>, which are subsequently saved in the database.</p>
        <fig id="figure5" position="float">
          <label>Figure 5</label>
          <caption>
            <p>Workflow of the natural language processing–driven artificial intelligence–assisted coding system deployed in Kaohsiung Medical University Chung-Ho Memorial Hospital. ICD-10-CM: International Classification of Diseases, 10th Revision, Clinical Modification; SQL: structured query language.</p>
          </caption>
          <graphic xlink:href="jmir_v26i1e58278_fig5.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Tw-DRG Payment System</title>
        <p>Figure S1 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> depicts the current implementation practice of the Tw-DRG payment system. The initial step of this process involves the conversion of diagnoses from physician-provided discharge summaries into <italic>ICD-10-CM</italic> codes and follows the principal diagnosis selection principle defined by NHIA to use the main cause of hospitalization as the principal diagnosis. In cases where multiple principal diagnoses are identified simultaneously, the one with the highest medical cost will be selected. Our NLP-driven AI-assisted coding system plays a pivotal role in the first and second steps of the process, automating the <italic>ICD-10-CM</italic> coding process based on discharge summaries and determining the principal diagnosis.</p>
        <p>As listed in <xref ref-type="table" rid="table2">Table 2</xref>, the cause of a patient’s hospitalization identified by our system as the principal diagnosis is subsequently categorized into the corresponding MDC under the Tw-DRG calculation software program. The MDC schematic classification for inpatient cases consists of 26 distinct categories, spanning from pre-MDC to MDC 1 through MDC 25. Once the principal diagnosis is determined for a patient, similar therapeutic diseases or procedures are further divided into multiple DRGs. This division considers various factors including the patient’s age and gender, the presence of comorbidities or complications (secondary diagnoses), discharge status, etc. The prospective inpatient costs that the NHIA should reimburse hospitals are calculated by leveraging historical data from the health care industry as a foundational reference. In sum, the DRG provides essential information for this specific hospitalization, including details related to health insurance reimbursement, relative weight, and the presence of comorbidities and complications.</p>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>The 26 major diagnostic categories (MDC) in Taiwan diagnosis related groups.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="250"/>
            <col width="750"/>
            <thead>
              <tr valign="top">
                <td>MDC</td>
                <td>Title</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Pre</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Heart Transplant</p>
                    </list-item>
                    <list-item>
                      <p>Liver Transplant</p>
                    </list-item>
                    <list-item>
                      <p>Bone Marrow Transplant</p>
                    </list-item>
                    <list-item>
                      <p>Tracheostomy</p>
                    </list-item>
                    <list-item>
                      <p>Lung Transplant</p>
                    </list-item>
                    <list-item>
                      <p>Pancreatic Transplant</p>
                    </list-item>
                  </list>
                </td>
              </tr>
              <tr valign="top">
                <td>1</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Diseases and Disorders of the Nervous System</p>
                    </list-item>
                  </list>
                </td>
              </tr>
              <tr valign="top">
                <td>2</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Diseases and Disorders of the Eye</p>
                    </list-item>
                  </list>
                </td>
              </tr>
              <tr valign="top">
                <td>3</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Diseases and Disorders of the Ear, Nose, Mouth and Throat</p>
                    </list-item>
                  </list>
                </td>
              </tr>
              <tr valign="top">
                <td>4</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Diseases and Disorders of the Respiratory System</p>
                    </list-item>
                  </list>
                </td>
              </tr>
              <tr valign="top">
                <td>5</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Diseases and Disorders of the Circulatory System</p>
                    </list-item>
                  </list>
                </td>
              </tr>
              <tr valign="top">
                <td>6</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Diseases and Disorders of the Digestive System</p>
                    </list-item>
                  </list>
                </td>
              </tr>
              <tr valign="top">
                <td>7</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Diseases and Disorders of the Hepatobiliary System and Pancreas</p>
                    </list-item>
                  </list>
                </td>
              </tr>
              <tr valign="top">
                <td>8</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Diseases and Disorders of the Musculoskeletal System and Connective Tissue</p>
                    </list-item>
                  </list>
                </td>
              </tr>
              <tr valign="top">
                <td>9</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Diseases and Disorders of the Skin, Subcutaneous Tissue and Breast</p>
                    </list-item>
                  </list>
                </td>
              </tr>
              <tr valign="top">
                <td>10</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Endocrine, Nutritional and Metabolic Diseases and Disorders</p>
                    </list-item>
                  </list>
                </td>
              </tr>
              <tr valign="top">
                <td>11</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Diseases and Disorders of the Kidney and Urinary Tract</p>
                    </list-item>
                  </list>
                </td>
              </tr>
              <tr valign="top">
                <td>12</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Diseases and Disorders of the Male Reproductive System</p>
                    </list-item>
                  </list>
                </td>
              </tr>
              <tr valign="top">
                <td>13</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Diseases and Disorders of the Female Reproductive System</p>
                    </list-item>
                  </list>
                </td>
              </tr>
              <tr valign="top">
                <td>14</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Pregnancy, Childbirth and the Puerperium</p>
                    </list-item>
                  </list>
                </td>
              </tr>
              <tr valign="top">
                <td>15</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Newborns and Other Neonates with Conditions Originating in the Perinatal Period</p>
                    </list-item>
                  </list>
                </td>
              </tr>
              <tr valign="top">
                <td>16</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Diseases and Disorders of the Blood and Blood Forming Organs and Immunological Disorders</p>
                    </list-item>
                  </list>
                </td>
              </tr>
              <tr valign="top">
                <td>17</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Myeloproliferative Diseases and Disorders, and Poorly Differentiated Neoplasm</p>
                    </list-item>
                  </list>
                </td>
              </tr>
              <tr valign="top">
                <td>18</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Infectious and Parasitic Diseases (Systemic or Unspecified Sites)</p>
                    </list-item>
                  </list>
                </td>
              </tr>
              <tr valign="top">
                <td>19</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Mental Diseases and Disorders</p>
                    </list-item>
                  </list>
                </td>
              </tr>
              <tr valign="top">
                <td>20</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Alcohol/Drug Use and Alcohol/Drug Induced Organic Mental Disorders</p>
                    </list-item>
                  </list>
                </td>
              </tr>
              <tr valign="top">
                <td>21</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Injuries, Poisonings and Toxic Effects of Drugs</p>
                    </list-item>
                  </list>
                </td>
              </tr>
              <tr valign="top">
                <td>22</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Burns</p>
                    </list-item>
                  </list>
                </td>
              </tr>
              <tr valign="top">
                <td>23</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Factors Influencing Health Status and Other Contacts with Health Services</p>
                    </list-item>
                  </list>
                </td>
              </tr>
              <tr valign="top">
                <td>24</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Multiple Significant Trauma</p>
                    </list-item>
                  </list>
                </td>
              </tr>
              <tr valign="top">
                <td>25</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Human Immunodeficiency Virus Infection</p>
                    </list-item>
                  </list>
                </td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
      </sec>
      <sec>
        <title>Experiment Configurations</title>
        <p>Both deep learning models were implemented using CUDA 12.0 (Nvidia Corp) and the PyTorch (Meta Platforms) libraries trained on machines equipped with an Intel i7-13700 processor (Intel Corporation), 64 GB of RAM, and an NVIDIA GeForce RTX 4090 24-GB graphics card (Nvidia Corp). During the training of these 2 models, different hyper-parameter configurations were used. For HAN, we set the batch size to 32, the learning rate to 1e-3, and the number of epochs to 500, and used the Adam optimizer [<xref ref-type="bibr" rid="ref32">32</xref>] for optimization. During the training process, we implemented an early stopping strategy with a patience value of 50, which was triggered if there was no improvement in the <italic>F</italic><sub>1</sub>-score or loss, or if the loss on the validation set reached 0. For GPT-2, we configured the learning rate to be 1.5e-4, set the number of epochs to 10, and set the batch size to 4 to prevent out-of-memory issues. We used the AdamW optimizer [<xref ref-type="bibr" rid="ref33">33</xref>] for parameter optimization, setting the epsilon to 1.0e-09 to ensure that the model stops updating when the learning rate drops below this threshold. In addition, to align with the maximum input length in the GPT-2 architecture, we set the maximum input length for each training instance to 1024.</p>
        <p>The evaluation metric used to assess the performance of the developed models is the <italic>F</italic>-measure, calculated from the harmonic mean of the precision and recall. Precision and recall are computed based on the counts of correctly predicted <italic>ICD-10-CM</italic> codes (true positives), incorrectly predicted <italic>ICD-10-CM</italic> codes (false positives), and undetected <italic>ICD-10-CM</italic> codes (false negatives). These values are determined by comparing the model’s predictions with the ICD codes assigned by the CCS. These metrics are calculated using the following equations 5, 6 and 7 for the precision, recall and <italic>F</italic><sub>1</sub>-scores.</p>
        <disp-formula>
          <graphic xlink:href="jmir_v26i1e58278_fig10.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </disp-formula>
        <disp-formula>
          <graphic xlink:href="jmir_v26i1e58278_fig11.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </disp-formula>
        <disp-formula>
          <graphic xlink:href="jmir_v26i1e58278_fig12.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </disp-formula>
        <p>It is worth noting that in the evaluation step, in addition to assessing the performance of the autocoding systems on the original compiled corpus, we also compiled a new Tw-DRGs data set consisting of EMRs collected from the real KMUCHH environment. The new data set was processed by the aforementioned autocoding systems in series with NHIA’s DRG calculation software. In addition, to assess the reliability of our NLP-driven AI-assisted coding system in the Tw-DRG inpatient payment process, we used the κ measure as an indicator of agreement for MDC between our system and CCS curated results. Specifically, we used the Cohen κ [<xref ref-type="bibr" rid="ref34">34</xref>] to estimate the κ values. For this comparison, we used the Tw-DRG calculation program provided by the NHIA in Taiwan. This program uses inputs, such as the predicted main code, other codes, gender, and date of birth to perform MDC classification and DRG grouping. We developed a program to analyze the output from the Tw-DRG calculation program for the comparison study.</p>
      </sec>
      <sec>
        <title>Ethical Considerations</title>
        <p>This study was approved by the KMUCHH Institutional Review Board (protocol title: “Developing Artificial Intelligence model to support <italic>ICD-10-CM</italic> and PCS coding and comparing the performance between machine coding and manual coding”; institutional review board number: KMUHIRB-E(II)-20230214). As the study utilized secondary data collected under the original institutional review board approved protocol, no additional informed consent was required. All data collected from KMUCHH was aggregated for research purposes adhering to fair use principles. Participant privacy was rigorously protected through anonymization, and stringent confidentiality measures were implemented. Given that the study involved only the analysis of existing, deidentified data, there was no direct interaction with participants, and thus, no compensation was provided. Additionally, the study strictly adhered to all applicable local, national, regional, and international laws and regulations concerning the protection of personal information, privacy, and human rights.</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Overview</title>
        <p>In the following subsections, we demonstrated the effectiveness of deploying NLP-driven AI-assisted coding systems, specifically using HAN and GPT-2 models, in the context of <italic>ICD-10-CM</italic> coding and Tw-DRGs process. We provide a comparative analysis of the AI-assisted coding results with CCS curated results for Tw-DRGs using the data set gathered from the deployment of the developed AI-assistant system in the hospital environment. This analysis validates the potential of NLP-driven AI-assisted coding in facilitating the Tw-DRGs process, highlighting its effectiveness in improving coding.</p>
      </sec>
      <sec>
        <title>Performance Comparison of the Developed Deep Learning Models on the ICD-10-CM Coding Task</title>
        <p><xref ref-type="table" rid="table3">Tables 3</xref> and <xref ref-type="table" rid="table4">4</xref> offer a comprehensive analysis of the performance of the baseline models as well as the HAN and GPT-2 models in <italic>ICD-10-CM</italic> autocoding on the test set. In addition to the full code results, <xref ref-type="table" rid="table3">Table 3</xref> shows the performance of each model on the top-50 codes (Top-50 F) which is often reported in previous research papers. The list of the top-50 <italic>ICD-10-CM</italic> codes can be found in <xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>. The results presented in <xref ref-type="table" rid="table3">Table 3</xref> highlight the superior performance of the GPT-2 model compared to the other models. GPT-2 exhibited higher precision, recall, and <italic>F</italic>-measure in its overall performance compared to HAN, with improvements of 0.134, 0.077, and 0.107, respectively. Additionally, the boost in the <italic>F</italic><sub>1</sub>-score for the main code further emphasizes the advantage of applying the GPT-2 model over the other models. Both HAN and GPT-2 models achieved satisfactory top-50 <italic>F</italic><sub>1</sub>-scores but obtained significantly lower <italic>F</italic><sub>1</sub>-scores for the full codes owing to the long-tailed distribution of the <italic>ICD-10-CM</italic> codes assigned by the CCS.</p>
        <table-wrap position="float" id="table3">
          <label>Table 3</label>
          <caption>
            <p>Overall performance evaluation of hierarchical attention network (HAN) and GPT-2 models in the International Classification of Diseases, 10th Revision, Clinical Modification, autocoding on the test set.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="150"/>
            <col width="160"/>
            <col width="120"/>
            <col width="180"/>
            <col width="180"/>
            <col width="210"/>
            <thead>
              <tr valign="top">
                <td>Model</td>
                <td>Precision</td>
                <td>Recall</td>
                <td><italic>F</italic>-measure</td>
                <td>Top-50 (F)</td>
                <td>Main code (F)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>HAN</td>
                <td>0.529</td>
                <td>0.594</td>
                <td>0.560</td>
                <td>0.818</td>
                <td>0.429</td>
              </tr>
              <tr valign="top">
                <td>GPT-2</td>
                <td>0.663</td>
                <td>
                  <italic>0.671</italic>
                  <sup>
                    <italic>a</italic>
                  </sup>
                </td>
                <td>
                  <italic>0.667</italic>
                  <sup>
                    <italic>a</italic>
                  </sup>
                </td>
                <td>
                  <italic>0.851</italic>
                  <sup>
                    <italic>a</italic>
                  </sup>
                </td>
                <td>
                  <italic>0.575</italic>
                  <sup>
                    <italic>a</italic>
                  </sup>
                </td>
              </tr>
              <tr valign="top">
                <td>BiGRU<sup>b</sup></td>
                <td>
                  <italic>0.765</italic>
                  <sup>
                    <italic>a</italic>
                  </sup>
                </td>
                <td>0.451</td>
                <td>0.567</td>
                <td>0.717</td>
                <td>0.250</td>
              </tr>
              <tr valign="top">
                <td>BERT<sup>c</sup></td>
                <td>0.759</td>
                <td>0.366</td>
                <td>0.494</td>
                <td>0.698</td>
                <td>0.215</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table3fn1">
              <p><sup>a</sup>The best value for each performance metric is italicized.</p>
            </fn>
            <fn id="table3fn2">
              <p><sup>b</sup>BiGRU: bidirectional gated recurrent unit.</p>
            </fn>
            <fn id="table3fn3">
              <p><sup>c</sup>BERT: bidirectional encoder representations from transformers.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <p><xref ref-type="table" rid="table4">Table 4</xref> provides a more detailed breakdown of the <italic>ICD-10-CM</italic> subcategory performance for the 2 models. The results underscore the consistent superiority of GPT-2 over HAN across all categories, particularly in situations with limited data (&#60;10%). Notably, GPT-2 acquired better <italic>F</italic><sub>1</sub>-scores in the T, H, and O categories compared to HAN with an increase of 0.387, 0.279, and 0.269, respectively. This accentuates the advantage of GPT-2 due to its pretrained nature, which enables it to extract crucial features in a more efficient manner compared to HAN when only limited training data are provided.</p>
        <p>By contrast, both models revealed low performance in the X and Y categories. These 2 categories suffered from a lack of data, as they have the lowest number of training instances as outlined in the last column of <xref ref-type="table" rid="table4">Table 4</xref>. The restricted amount of training data poses a challenge to the models in achieving better performances in these specific categories.</p>
        <table-wrap position="float" id="table4">
          <label>Table 4</label>
          <caption>
            <p>The International Classification of Diseases, Tenth Revision, Clinical Modification, category-specific performance comparison of the hierarchical attention network (HAN) and GPT-2 on the test set.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="190"/>
            <col width="130"/>
            <col width="110"/>
            <col width="120"/>
            <col width="450"/>
            <thead>
              <tr valign="top">
                <td>Categories (block)</td>
                <td>Chapters</td>
                <td colspan="2">Models (<italic>F</italic>-measure)</td>
                <td>Discharge summaries in the training set (n=105,101), n (%)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>HAN</td>
                <td>GPT-2</td>
                <td>
                  <break/>
                </td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>A</td>
                <td>I</td>
                <td>0.851</td>
                <td>
                  <italic>0.955</italic>
                  <sup>a</sup>
                </td>
                <td>15,267 (14.53)</td>
              </tr>
              <tr valign="top">
                <td>B</td>
                <td>I</td>
                <td>0.703</td>
                <td>
                  <italic>0.889</italic>
                  <sup>a</sup>
                </td>
                <td>20,441 (19.45)</td>
              </tr>
              <tr valign="top">
                <td>C</td>
                <td>II</td>
                <td>0.812</td>
                <td>
                  <italic>0.890</italic>
                  <sup>a</sup>
                </td>
                <td>71,202 (67.75)</td>
              </tr>
              <tr valign="top">
                <td>D</td>
                <td>II, III</td>
                <td>0.702</td>
                <td>
                  <italic>0.814</italic>
                  <sup>a</sup>
                </td>
                <td>28,393 (27.01)</td>
              </tr>
              <tr valign="top">
                <td>E</td>
                <td>IV</td>
                <td>0.797</td>
                <td>
                  <italic>0.888</italic>
                  <sup>a</sup>
                </td>
                <td>63,545 (60.46)</td>
              </tr>
              <tr valign="top">
                <td>F</td>
                <td>V</td>
                <td>0.611</td>
                <td>
                  <italic>0.833</italic>
                  <sup>a</sup>
                </td>
                <td>8835 (8.41)</td>
              </tr>
              <tr valign="top">
                <td>G</td>
                <td>VI</td>
                <td>0.566</td>
                <td>
                  <italic>0.794</italic>
                  <sup>a</sup>
                </td>
                <td>13,236 (12.59)</td>
              </tr>
              <tr valign="top">
                <td>H</td>
                <td>VII, VIII</td>
                <td>0.576</td>
                <td>
                  <italic>0.855</italic>
                  <sup>a</sup>
                </td>
                <td>9824 (9.35)</td>
              </tr>
              <tr valign="top">
                <td>I</td>
                <td>IX</td>
                <td>0.781</td>
                <td>
                  <italic>0.833</italic>
                  <sup>a</sup>
                </td>
                <td>82,005 (78.02)</td>
              </tr>
              <tr valign="top">
                <td>J</td>
                <td>X</td>
                <td>0.756</td>
                <td>
                  <italic>0.880</italic>
                  <sup>a</sup>
                </td>
                <td>31,233 (29.72)</td>
              </tr>
              <tr valign="top">
                <td>K</td>
                <td>XI</td>
                <td>0.714</td>
                <td>
                  <italic>0.843</italic>
                  <sup>a</sup>
                </td>
                <td>48,827 (46.46)</td>
              </tr>
              <tr valign="top">
                <td>L</td>
                <td>XII</td>
                <td>0.496</td>
                <td>
                  <italic>0.730</italic>
                  <sup>a</sup>
                </td>
                <td>7369 (7.01)</td>
              </tr>
              <tr valign="top">
                <td>M</td>
                <td>XIII</td>
                <td>0.549</td>
                <td>
                  <italic>0.764</italic>
                  <sup>a</sup>
                </td>
                <td>20,312 (19.33)</td>
              </tr>
              <tr valign="top">
                <td>N</td>
                <td>XIV</td>
                <td>0.783</td>
                <td>
                  <italic>0.894</italic>
                  <sup>a</sup>
                </td>
                <td>39,504 (37.59)</td>
              </tr>
              <tr valign="top">
                <td>O</td>
                <td>XV</td>
                <td>0.583</td>
                <td>
                  <italic>0.852</italic>
                  <sup>a</sup>
                </td>
                <td>3887 (3.7)</td>
              </tr>
              <tr valign="top">
                <td>P</td>
                <td>XVI</td>
                <td>0.738</td>
                <td>
                  <italic>0.857</italic>
                  <sup>a</sup>
                </td>
                <td>4297 (4.09)</td>
              </tr>
              <tr valign="top">
                <td>Q</td>
                <td>XVII</td>
                <td>0.571</td>
                <td>
                  <italic>0.652</italic>
                  <sup>a</sup>
                </td>
                <td>2918 (2.78)</td>
              </tr>
              <tr valign="top">
                <td>R</td>
                <td>XVIII</td>
                <td>0.545</td>
                <td>
                  <italic>0.617</italic>
                  <sup>a</sup>
                </td>
                <td>24,544 (23.35)</td>
              </tr>
              <tr valign="top">
                <td>S</td>
                <td>XIX</td>
                <td>0.661</td>
                <td>
                  <italic>0.742</italic>
                  <sup>a</sup>
                </td>
                <td>20,850 (19.84)</td>
              </tr>
              <tr valign="top">
                <td>T</td>
                <td>XIX</td>
                <td>0.297</td>
                <td>
                  <italic>0.684</italic>
                  <sup>a</sup>
                </td>
                <td>8505 (8.09)</td>
              </tr>
              <tr valign="top">
                <td>V</td>
                <td>XX</td>
                <td>0.446</td>
                <td>
                  <italic>0.584</italic>
                  <sup>a</sup>
                </td>
                <td>3613 (3.44)</td>
              </tr>
              <tr valign="top">
                <td>W</td>
                <td>XX</td>
                <td>0.459</td>
                <td>
                  <italic>0.619</italic>
                  <sup>a</sup>
                </td>
                <td>3558 (3.39)</td>
              </tr>
              <tr valign="top">
                <td>X</td>
                <td>XX</td>
                <td>0.270</td>
                <td>
                  <italic>0.444</italic>
                  <sup>a</sup>
                </td>
                <td>1968 (1.87)</td>
              </tr>
              <tr valign="top">
                <td>Y</td>
                <td>XX</td>
                <td>0.425</td>
                <td>
                  <italic>0.458</italic>
                  <sup>a</sup>
                </td>
                <td>2652 (2.52)</td>
              </tr>
              <tr valign="top">
                <td>Z</td>
                <td>XXI</td>
                <td>0.657</td>
                <td>
                  <italic>0.747</italic>
                  <sup>a</sup>
                </td>
                <td>39,577 (37.66)</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table4fn1">
              <p><sup>a</sup>The best value for each performance metric is italicized.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Comparative Analysis of NLP-Driven AI-Assisted Coding Results With CCS for Tw-DRGs in the Real Hospital Environment</title>
        <p>In this subsection, we provide a comparative analysis of the performance of the deployed systems from the perspective of Tw-DRGs. The performance of Tw-DRGs was estimated on the additional data set of 2632 discharge cases stored in the SQL server database indicated in <xref rid="figure5" ref-type="fig">Figure 5</xref>, which was collected from February 2023 to April 2023.</p>
        <p>As described in the previous section on the Tw-DRGs payment system, the principal diagnosis is the crucial factor in determining the MDC, while secondary diagnoses are only used to determine different Tw-DRGs distributions within the same MDC. The principal diagnosis is determined based on the reason for the patient’s hospitalization and only a single disease can be selected. If there are multiple diseases for which the patient is receiving treatment upon admission, selecting any one of them as the principal diagnosis is not considered an error but may affect the results of Tw-DRGs. Hence, the principal diagnosis selection cannot be assessed by solely comparing the system output against the <italic>ICD-10-CM</italic> main codes initially assigned by the CCS. Instead, it necessitates the expertise of the CCS to reevaluate both the EMRs and the codes proposed by the developed systems. Therefore, to provide a comparative analysis of the 2 developed models in terms of their performance in defining the principal diagnosis as shown in <xref ref-type="table" rid="table5">Table 5</xref>, the senior CCS, author ATL, who is the third author of the paper, examined the discharged cases to determine the accuracy of principal diagnoses suggested by the AI-assisted coding systems based on the HAN and GPT-2 models.</p>
        <table-wrap position="float" id="table5">
          <label>Table 5</label>
          <caption>
            <p>Comparison of performance between the deep learning models and certified coding specialist–curated results for principal diagnosis in discharge cases.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="340"/>
            <col width="0"/>
            <col width="230"/>
            <col width="0"/>
            <col width="200"/>
            <col width="0"/>
            <col width="200"/>
            <thead>
              <tr valign="top">
                <td colspan="3">Models and conditions</td>
                <td colspan="2">February 2023 (n=748), n (%)</td>
                <td colspan="2">March 2023 (n=991), n (%)</td>
                <td>April 2023 (n=893), n (%)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="8">
                  <bold>HAN<sup>a</sup> (<italic>F</italic>-measure=0.524)</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>All correct<sup>b</sup></td>
                <td colspan="2">79 (10.6)</td>
                <td colspan="2">181 (18.3)</td>
                <td colspan="2">177 (19.8)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Principal diagnosis with incorrect secondary diagnoses<sup>c</sup></td>
                <td colspan="2">462 (61.8)</td>
                <td colspan="2">477 (48.1)</td>
                <td colspan="2">369 (41.3)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>No principal diagnosis<sup>d</sup></td>
                <td colspan="2">181 (24.2)</td>
                <td colspan="2">277 (27)</td>
                <td colspan="2">285 (31.9)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>All incorrect<sup>e</sup></td>
                <td colspan="2">26 (3.5)</td>
                <td colspan="2">56 (5.5)</td>
                <td colspan="2">62 (6.9)</td>
              </tr>
              <tr valign="top">
                <td colspan="8">
                  <bold>GPT-2 (<italic>F</italic>-measure=0.621)</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>All correct<sup>b</sup></td>
                <td colspan="2">130 (17.4)</td>
                <td colspan="2">161 (16.2)</td>
                <td colspan="2">144 (16.1)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Principal diagnosis with incorrect secondary diagnoses<sup>c</sup></td>
                <td colspan="2">443 (59.2)</td>
                <td colspan="2">599 (60.4)</td>
                <td colspan="2">524 (58.7)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>No principal diagnosis<sup>d</sup></td>
                <td colspan="2">165 (22.1)</td>
                <td colspan="2">186 (18.8)</td>
                <td colspan="2">183 (20.5)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>All incorrect<sup>e</sup></td>
                <td colspan="2">24 (3.2)</td>
                <td colspan="2">29 (2.9)</td>
                <td colspan="2">43 (4.8)</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table5fn1">
              <p><sup>a</sup>HAN: hierarchical attention network.</p>
            </fn>
            <fn id="table5fn2">
              <p><sup>b</sup>Autocoding results were identical to certified coding specialists.</p>
            </fn>
            <fn id="table5fn3">
              <p><sup>c</sup>Autocoding system correctly identified the principal diagnosis, but discrepancies exist in ≥1 secondary diagnosis codes assigned by certified coding specialists.</p>
            </fn>
            <fn id="table5fn4">
              <p><sup>d</sup>Autocoding system’s principal diagnosis was different from certified coding specialists.</p>
            </fn>
            <fn id="table5fn5">
              <p><sup>e</sup>Autocoding results were entirely different from certified coding specialists.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <p>The first column of <xref ref-type="table" rid="table5">Table 5</xref> shows the <italic>F</italic>-measure estimates for HAN (0.524) and GPT-2 (0.621) in their suggestion for main codes in comparison with the CCS coding results. The reported <italic>F</italic>-measures align with our observation on the test set and validate GPT-2 as a more reliable model. The manual reassessment of the results performed by the senior CCS according to the definition of principal diagnosis is displayed in the second to fourth columns of the table. Notably, most (1308/2632, 49.7%) of the cases of the 2 deep learning models when reviewed against the manual coding of the CCS fall under the category “principal diagnosis with incorrect secondary diagnoses,” followed by the category “no principal diagnosis.” The “all incorrect” cases had the lowest proportion among all categories. Predictions of GPT-2 achieved a better “all correct” category coverage and once again demonstrate superiority over HAN in facilitating the ICD coding task. The respective overall correct rate for principal diagnosis for HAN and GPT-2, which takes into account both categories “all correct” and “principal diagnosis with incorrect secondary diagnoses,” was 0.663 and 0.7602.</p>
        <p>In <xref ref-type="table" rid="table6">Table 6</xref>, we extended the comparison to assess the agreement of MDC between the 2 developed models and the CCS. The first, second, and third columns of <xref ref-type="table" rid="table6">Table 6</xref> show the MDC estimated results for the codes assigned by CCS, HAN, and GPT-2. The last 2 columns show the estimated κ values corresponding to the 2 models against the CCS.</p>
        <table-wrap position="float" id="table6">
          <label>Table 6</label>
          <caption>
            <p>Comparison of the κ values between 2 deep learning models and a senior certified coding specialist (CCS) for major diagnostic category (MDC) in discharge cases spanning from February 2023 to April 2023 at Kaohsiung Medical University Chung-Ho Memorial Hospital.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="90"/>
            <col width="220"/>
            <col width="180"/>
            <col width="180"/>
            <col width="0"/>
            <col width="160"/>
            <col width="170"/>
            <thead>
              <tr valign="bottom">
                <td>MDC</td>
                <td>CCS</td>
                <td colspan="3">Model</td>
                <td colspan="2">κ value<sup>a</sup></td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Manual coding (n=2632), n (%)</td>
                <td>HAN<sup>b</sup> (n=2632), n (%)</td>
                <td>GPT-2 (n=2632), n (%)</td>
                <td colspan="2">HAN (average 0.592)</td>
                <td>GPT-2 (average 0.714)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Pre</td>
                <td>0 (0)</td>
                <td>0 (0)</td>
                <td>0 (0)</td>
                <td colspan="2">—<sup>c</sup></td>
                <td>—</td>
              </tr>
              <tr valign="top">
                <td>1</td>
                <td>509 (19.34)</td>
                <td>280 (10.64)</td>
                <td>497 (18.88)</td>
                <td colspan="2">0.670<sup>d</sup></td>
                <td>0.803<sup>d</sup></td>
              </tr>
              <tr valign="top">
                <td>2</td>
                <td>38 (1.44)</td>
                <td>9 (0.34)</td>
                <td>28 (1.06)</td>
                <td colspan="2">0.300</td>
                <td>0.724<sup>d</sup></td>
              </tr>
              <tr valign="top">
                <td>3</td>
                <td>132 (5.02)</td>
                <td>113 (4.29)</td>
                <td>124 (4.71)</td>
                <td colspan="2">0.689<sup>d</sup></td>
                <td>
                  <italic>0.852</italic>
                  <sup>e</sup>
                </td>
              </tr>
              <tr valign="top">
                <td>4</td>
                <td>302 (11.47)</td>
                <td>309 (11.74)</td>
                <td>287 (10.90)</td>
                <td colspan="2">
                  <italic>0.845</italic>
                  <sup>e</sup>
                </td>
                <td>
                  <italic>0.826</italic>
                  <sup>e</sup>
                </td>
              </tr>
              <tr valign="top">
                <td>5</td>
                <td>184 (6.99)</td>
                <td>310 (11.78)</td>
                <td>213 (8.09)</td>
                <td colspan="2">0.607</td>
                <td>0.786<sup>d</sup></td>
              </tr>
              <tr valign="top">
                <td>6</td>
                <td>229 (8.7)</td>
                <td>217 (8.24)</td>
                <td>222 (8.43)</td>
                <td colspan="2">0.775<sup>d</sup></td>
                <td>
                  <italic>0.818</italic>
                  <sup>e</sup>
                </td>
              </tr>
              <tr valign="top">
                <td>7</td>
                <td>84 (3.19)</td>
                <td>90 (3.42)</td>
                <td>77 (2.93)</td>
                <td colspan="2">0.710<sup>d</sup></td>
                <td>
                  <italic>0.827</italic>
                  <sup>e</sup>
                </td>
              </tr>
              <tr valign="top">
                <td>8</td>
                <td>87 (3.31)</td>
                <td>66 (2.51)</td>
                <td>87 (3.31)</td>
                <td colspan="2">0.576</td>
                <td>0.786<sup>d</sup></td>
              </tr>
              <tr valign="top">
                <td>9</td>
                <td>116 (4.41)</td>
                <td>83 (3.15)</td>
                <td>95 (3.61)</td>
                <td colspan="2">0.692<sup>d</sup></td>
                <td>0.729<sup>d</sup></td>
              </tr>
              <tr valign="top">
                <td>10</td>
                <td>132 (5.02)</td>
                <td>237 (9)</td>
                <td>167 (6.34)</td>
                <td colspan="2">0.505</td>
                <td>0.642<sup>d</sup></td>
              </tr>
              <tr valign="top">
                <td>11</td>
                <td>120 (4.56)</td>
                <td>205 (7.79)</td>
                <td>99 (3.76)</td>
                <td colspan="2">0.648<sup>d</sup></td>
                <td>0.738<sup>d</sup></td>
              </tr>
              <tr valign="top">
                <td>12</td>
                <td>3 (0.11)</td>
                <td>7 (0.27)</td>
                <td>3 (0.11)</td>
                <td colspan="2">0.362</td>
                <td>0.666<sup>d</sup></td>
              </tr>
              <tr valign="top">
                <td>13</td>
                <td>8 (0.30)</td>
                <td>8 (0.30)</td>
                <td>10 (0.38)</td>
                <td colspan="2">0.749<sup>d</sup></td>
                <td>
                  <italic>0.889</italic>
                  <sup>e</sup>
                </td>
              </tr>
              <tr valign="top">
                <td>14</td>
                <td>41 (1.56)</td>
                <td>11 (0.42)</td>
                <td>38 (1.44)</td>
                <td colspan="2">0.419</td>
                <td>
                  <italic>1.000</italic>
                  <sup>e</sup>
                </td>
              </tr>
              <tr valign="top">
                <td>15</td>
                <td>15 (0.57)</td>
                <td>9 (0.34)</td>
                <td>16 (0.61)</td>
                <td colspan="2">0.635<sup>d</sup></td>
                <td>0.708<sup>d</sup></td>
              </tr>
              <tr valign="top">
                <td>16</td>
                <td>57 (2.17)</td>
                <td>64 (2.43)</td>
                <td>44 (1.67)</td>
                <td colspan="2">0.624<sup>d</sup></td>
                <td>0.728<sup>d</sup></td>
              </tr>
              <tr valign="top">
                <td>17</td>
                <td>2 (0.08)</td>
                <td>3 (0.11)</td>
                <td>1 (0.04)</td>
                <td colspan="2">0.399</td>
                <td>0.666<sup>d</sup></td>
              </tr>
              <tr valign="top">
                <td>18</td>
                <td>505 (19.19)</td>
                <td>465 (17.67)</td>
                <td>483 (18.35)</td>
                <td colspan="2">
                  <italic>0.870</italic>
                  <sup>e</sup>
                </td>
                <td>0.777<sup>d</sup></td>
              </tr>
              <tr valign="top">
                <td>19</td>
                <td>0 (0)</td>
                <td>23 (0.87)</td>
                <td>3 (0.11)</td>
                <td colspan="2">—</td>
                <td>—</td>
              </tr>
              <tr valign="top">
                <td>20</td>
                <td>0 (0)</td>
                <td>2 (0.08)</td>
                <td>2 (0.08)</td>
                <td colspan="2">—</td>
                <td>—</td>
              </tr>
              <tr valign="top">
                <td>21</td>
                <td>21 (0.8)</td>
                <td>24 (0.91)</td>
                <td>16 (0.61)</td>
                <td colspan="2">0.404</td>
                <td>0.646<sup>d</sup></td>
              </tr>
              <tr valign="top">
                <td>22</td>
                <td>1 (0.04)</td>
                <td>0 (0)</td>
                <td>0 (0)</td>
                <td colspan="2">—</td>
                <td>—</td>
              </tr>
              <tr valign="top">
                <td>23</td>
                <td>42 (1.6)</td>
                <td>24 (0.91)</td>
                <td>43 (1.63)</td>
                <td colspan="2">0.366</td>
                <td>0.398</td>
              </tr>
              <tr valign="top">
                <td>24</td>
                <td>4 (0.15)</td>
                <td>6 (0.23)</td>
                <td>3 (0.11)</td>
                <td colspan="2">0.599</td>
                <td>–0.010</td>
              </tr>
              <tr valign="top">
                <td>25</td>
                <td>0 (0)</td>
                <td>2 (0.08)</td>
                <td>0 (0)</td>
                <td colspan="2">—</td>
                <td>—</td>
              </tr>
              <tr valign="top">
                <td>None</td>
                <td>0 (0)</td>
                <td>65 (2.47)</td>
                <td>74 (2.81)</td>
                <td colspan="2">—</td>
                <td>—</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table6fn1">
              <p><sup>a</sup>The average κ values are calculated using arithmetic averaging of the MDC categories, excluding the blank cells.</p>
            </fn>
            <fn id="table6fn2">
              <p><sup>b</sup>HAN: hierarchical attention network.</p>
            </fn>
            <fn id="table6fn3">
              <p><sup>c</sup>Not applicable.</p>
            </fn>
            <fn id="table6fn4">
              <p><sup>d</sup>The κ value falls within the range of 0.61 to 0.80, indicating substantial reliability.</p>
            </fn>
            <fn id="table6fn5">
              <p><sup>e</sup>The κ value falls within the range of 0.81 to 1.00, indicating almost perfect reliability.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <p>Our analysis revealed that the CCS did not assign any cases to pre-MDC, MDC 19, MDC 20, and MDC 25 for the curated patients. By contrast, HAN erroneously assigned 23 (0.87%) of the 2632 cases to MDC 19, 2 (0.08%) cases to MDC 20, and 2 (0.08%) cases to MDC 25. GPT-2 made fewer mistakes in classifying 3 (0.11%) of the 2632 cases to MDC 19 and 2 (0.08%) cases to MDC 20. Following an analysis conducted by using SPSS (version 19; IBM Corp), outputs of both models along with the coding results manually curated by the CCS underwent Cohen κ evaluation for agreement. The results indicated that the average κ value for GPT-2 (0.714/Substantial) was approximately 12.2 percentage points higher than that of HAN (0.592/Moderate). Notably, the average κ value of GPT-2 exceeded 0.81 (highlighted in italics in <xref ref-type="table" rid="table6">Table 6</xref>) across 6 categories of the MDC, indicating almost perfect agreement between the CCS and GPT-2. These observations exhibit the effectiveness of NLP-driven systems in supporting the work of CCSs. It is worth mentioning that the κ value of GPT-2 in MDC 24 (pertaining to injuries involving 2 or more body systems) was recorded as –0.01 (poor). A close examination of the 7 cases of the compiled data set in MDC 24 reveals that while 6 (86%) cases shared consistent principal diagnoses, discrepancies arose in the secondary diagnoses. These discrepancies involved instances of overcoding for lung contusion and traumatic hemothorax, as well as undercoding for hepatic contusion and cervical spinal cord injury. Consequently, these inconsistencies resulted in a complete mismatch between the MDC results of GPT-2 and CCS-curated results.</p>
      </sec>
      <sec>
        <title>The Effectiveness of Applying Models Acquired With Biomedical Knowledge</title>
        <p>We conducted an ablation study comparing the performance of 2 fine-tuned GPT-2 models: one fine-tuned on PubMed documents and the other being the original GPT-2 model released by OpenAI. This study aimed to assess the necessity of using models fine-tuned on biomedical documents. The results indicate that the PubMed fine-tuned model achieves slightly better performance than the original GPT-2 model, with <italic>F</italic><sub>1</sub>-score improvements of 0.05 for full coding and 0.03 for main coding.</p>
        <p>In addition, we used the following prompt template to assess the knowledge of the 2 GPT-2 models for the tail-50 ICD codes.</p>
        <disp-formula>“&#60;<italic>ICD-10-CM</italic> Code&#62; &#60; <italic>ICD-10-CM</italic> Description&#62; The condition involves...”</disp-formula>
        <p>From the generated texts, we observed that both GPT-2 models generally possess a considerable understanding of <italic>ICD-10-CM</italic> terms. For instance, when prompted with the <italic>ICD-10-CM</italic> code “C9502: Acute leukemia of unspecified cell type, in relapse,” the GPT-2 model generated the following response: “This condition involves erythroid and myeloid cells and is associated with a poor prognosis.” This response accurately describes a challenging scenario in leukemia management, where the disease has relapsed with the involvement of both erythroid and myeloid cell lines, often indicating a poor prognosis.</p>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Error Analysis</title>
        <p>Ji et al [<xref ref-type="bibr" rid="ref35">35</xref>] discussed several challenges encountered during the implementation of automated clinical coding procedures. One significant challenge is that electronic health records often contain a variety of professional medical vocabularies alongside noisy information, including nonstandard synonyms and misspellings. To address this issue, our study used a variation of GPT-2 fine-tuned using 0.5 million PubMed abstracts to enhance its ability to recognize medical terminologies. The observations from the prompting results, as described in the Methods section, highlight the biomedical knowledge acquired by the pretrained GPT-2 model. This enhancement enabled the GPT-2 model perform better than the HAN model, which was not pretrained on biomedical data, particularly in categories, such as XIX-T, VII, VIII, and XV, where training instances were limited. Our analysis also reveals that HAN tends to attend to noisy information and sometimes generates completely irrelevant codes, which can be frustrating for CCSs. The result underscores the GPT-2 model’s adeptness in comprehending medical context and its effectiveness in mitigating the challenges associated with clinical coding procedures.</p>
        <p>Another problem highlighted by Ji et al [<xref ref-type="bibr" rid="ref35">35</xref>] is the high-dimensionality of medical codes and the long-tailed distribution, which results in limited corresponding training instances that are necessary for effective model training. EMRs associated with multiple diagnoses are particularly complex issues known as a multi-label extreme classification problem characterized by a vast label set, which was also encountered by our HAN model. In our implementation of HAN, the high-dimensional label space comprises 11,653 codes, magnifying the complexity of the task. On the contrary, while the high-dimensionality issue does not significantly impact the GPT-2 model, the scarcity of training instances still presents a considerable challenge. For instance, codes associated with the rarity of congenital conditions, such as those in category XVII (congenital malformations, deformations, and chromosomal abnormalities), are less frequent in the data set. This rarity results in fewer cases for the models to learn from, thereby hindering accurate predictions. In addition, we noted that certain codes manually labeled by CCSs, such as D489, K5100, Z8616, and others, were not present in the training set. Consequently, both models struggled to predict these unlearned codes, illustrating a critical limitation in the presented training data.</p>
        <p>In addition, there are categories with abundant training instances where both models’ performance remains unsatisfactory. Notably, codes from category XVIII appear in 23.35% of the training set summaries, yet the <italic>F</italic><sub>1</sub>-scores for both models are below 0.65. Upon analyzing the predicted results, we observed that while codes from the chapter, symptoms, signs, and abnormal clinical and laboratory findings, frequently appear in discharge summaries, disease classification rules often do not require separate coding for symptoms and signs that are related to a diagnosed disease. This discrepancy can lead to confusion for the developed models, causing them to misinterpret these entries.</p>
        <p>To tackle this issue, we intend to augment the training set by incorporating additional coded content curated by the CCS in future iterations. This will enable our system to learn novel coding content that the system did not previously come across. Moreover, we plan to explore the use of ICD code representation methods, as proposed by Vu et al [<xref ref-type="bibr" rid="ref36">36</xref>] and Wu et al [<xref ref-type="bibr" rid="ref37">37</xref>], into our models. By integrating these methods, we aim to further enhance our system’s performance and robustness in addressing the challenges associated with automated clinical coding.</p>
        <p>Moreover, although models with neural attentions learned to infer implicit relationships in discharge summaries by interpreting contextual expressions with weighted attentions, there are instances where pertinent information required for the coding judgment criteria is absent from the discharge summaries, for example, for chapters XIX and XX (injury, poisoning, and certain other consequences of external causes and external causes of morbidity and mortality). According to the classification rules, codes from these chapters are often used in conjunction with other codes. The data sources for these codes are not limited to discharge summaries but also include nursing records, imaging reports, and emergency department records. The developed models’ poor performance in these chapters may be due to the complexity and the need to integrate information from various sources, which is currently unavailable in the current implementation.</p>
        <p>Finally, we noticed that certain codes only existed in the test set. These scenarios can lead to incorrect <italic>ICD-10-CM</italic> codes generated by the model. Some examples as such are listed subsequently.</p>
        <p>In the discharge diagnosis, the developed system predicted the <italic>ICD-10-CM</italic> code for “# Heart failure” as I509:</p>
        <disp-quote>
          <p># Dyspnea with desaturation, focus on HAP (hospital-acquired pneumonia) and coronavirus disease of 2019 # Sepsis, focus on HAP (hospital-acquired pneumonia) and catheter related UTI (urinary tract infection) # Hyponatremia # Heart failure.</p>
        </disp-quote>
        <p>However, the correct coding is I5020. This discrepancy is primarily due to supplementary information recorded in the medical history on admission, specifically mentioning “# chronic systolic heart failure.” Following discussions with the CCS, it was agreed that future enhancements will attempt to merge the content from the history on admission to enable the model to learn from a broader range of medical history information, thus ensuring more accurate coding outcomes.</p>
      </sec>
      <sec>
        <title>Comparison With Prior Work</title>
        <p>Li et al [<xref ref-type="bibr" rid="ref38">38</xref>] introduced the DeepLabeler, a deep learning architecture based on a combination of the convolutional neural network (CNN) with the document-to-vector technique [<xref ref-type="bibr" rid="ref39">39</xref>] to extract and encode local and global features for <italic>ICD-9</italic> coding. Their approach achieved micro <italic>F</italic>-measures of 0.335 and 0.408 in the public multiparameter intelligent monitoring in intensive care (MIMIC)-II and MIMIC-III data sets, respectively. Zeng et al [<xref ref-type="bibr" rid="ref40">40</xref>] transferred the knowledge learned from the Medical Subject Headings indexing domain using the large-scale biomedical semantic indexing competition challenge data set [<xref ref-type="bibr" rid="ref41">41</xref>] to enhance the performance of the developed multi-scale CNN for automatic <italic>ICD-9</italic> coding. Their approach achieved a micro <italic>F</italic>-measure of 0.420 on the public MIMIC-III data set. Chen et al [<xref ref-type="bibr" rid="ref30">30</xref>] used diagnostic records from the National Taiwan University Hospital to build a data set with a total of 1,043,124 labels (using 14,602 unique codes as prediction candidates) and developed a deep neural network classification model based on the bidirectional gated recurrent unit along with the BERT-based word representation method. This model obtained an <italic>F</italic>-measure of 0.715 for <italic>ICD-10-CM</italic> coding on their test set. Wu et al [<xref ref-type="bibr" rid="ref37">37</xref>] proposed a pseudo label-wise attention mechanism aimed at automatically combining attention modes of similar ICD codes to tackle the issue of unbalanced <italic>multi-label</italic> classification in ICD coding. Their methodology involved using a bidirectional long short-term memory in tandem with the pseudo label-wise attention mechanism to represent EMRs as vectors. They then used a BERT-based pretrained model to determine the vector representations of the <italic>ICD-10</italic> codes. Finally, they calculated the similarity between EMR vectors and ICD vectors to determine the assigned codes. This approach yielded a micro <italic>F</italic>-measure of 0.806 on their private Chinese Xiangya data sets. In a separate study, Bhutto et al [<xref ref-type="bibr" rid="ref42">42</xref>] proposed a deep recurrent-CNN architecture with a lambda-scaled attention module. Their approach yielded micro <italic>F</italic>-measures of 0.862 and 0.705 on a private Pakistan clinical notes. In comparison with these studies, the GPT-2 model in our research demonstrates competitive performance, particularly excelling in cases where the training data for specific categories is limited, showcasing its robustness in handling diverse and challenging scenarios.</p>
        <p>In addition, certain previous research also explored the impact of erroneous <italic>ICD-10-CM</italic> coding on hospital finances. Zafirah et al [<xref ref-type="bibr" rid="ref43">43</xref>] studied the potential loss attributable to clinical coding errors in a Malaysian teaching hospital. Their findings indicate a high prevalence of error coding in medical records, particularly concerning secondary diagnosis codes, which reached 81.3% (377/464). The estimated financial impact on the medical discipline in this hospital amounted to a potential profit loss of RM 85,804.92 (US $19,617.05). Toner et al [<xref ref-type="bibr" rid="ref15">15</xref>] conducted a retrospective comparative analysis of case records for patients with the M966 diagnosis code (periprosthetic fracture) in a district general hospital. Their work revealed that <italic>ICD-10-CM</italic> coding errors resulted in a loss of £25,000 (US $33,029.97) when compared to the actual hospital revenue. In contrast with relevant studies, our examination of the consistency assessment of the autocoding system and CCS coding indicates promising prospects in reducing manual workload and providing coding references to minimize human errors. The error rate of CCS detected with the assistance of the AI-assisted coding system is 1.9% (50/2632; number of coding errors/total cases).</p>
        <p>Pivotally, our work serves as the first endeavor to examine the feasibility of the combination of <italic>ICD-10</italic> coding with DRGs in the real hospital environment, which indicates a significant advancement in the area. In conclusion, the implementation of NLP-driven AI-assisted coding systems contributes to a reduction in CCS coding errors and manual workload, thereby enhancing the overall efficiency of the coding process, lowering error rates, and mitigating financial losses.</p>
      </sec>
      <sec>
        <title>Limitations</title>
        <p>While our study demonstrates the potential of NLP-driven AI-assisted coding systems in improving <italic>ICD-10-CM</italic> coding accuracy and efficiency, several limitations should be acknowledged. First, the data set used for this study was sourced from a single hospital, which may limit the generalizability of the presented results. Future studies should include data from multiple hospitals, covering diverse geographical regions and varying patient demographics, to ensure broader applicability of the findings and to validate the robustness of the models across different settings.</p>
        <p>Second, this study involved only 1 senior CCS to conduct the κ analysis for the agreements among the developed models. This limitation means that the results may not necessarily extend to other CCSs, particularly those with different levels of experience. Future works should consider recruiting a larger and more diverse group of CCSs, including both senior and junior coders, to evaluate the helpfulness of the proposed models comprehensively. In addition, studying the reduction in coding time and the impact on workflow efficiency in practical settings would provide valuable insights into the real-world benefits of AI-assisted coding systems.</p>
        <p>Third, this study concentrated solely on Tw-DRGs, which are specific to Taiwan’s health care system. As a result, the findings may not be directly generalizable to other DRG systems used in different countries or regions, or to DRGs estimated in different periods. Further research is needed to ascertain the applicability of our conclusions to other DRG systems worldwide. Investigating the performance of the models in different international contexts and updating the models to reflect changes in DRG systems over time would enhance the relevance and utility of the findings.</p>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>In summary, our study has demonstrated the effectiveness of deploying NLP-driven AI-assisted coding systems, specifically using HAN and GPT-2 models, in the context of <italic>ICD-10-CM</italic> coding and Tw-DRGs process. The comparative analysis revealed that GPT-2 consistently performed better than HAN and exhibited higher precision, recall, and <italic>F</italic><sub>1</sub>-scores. This superior performance was particularly evident in scenarios with limited data, highlighting the robustness of GPT-2 in extracting vital features from discharge summaries. Moreover, the evaluation of principal diagnosis and MDC in Tw-DRGs showcased the utility of the developed models. GPT-2, in particular, acquired higher agreement values and made fewer mistakes in MDC classification. In the practical deployment of the system within a hospital environment, the comparative analysis with CCS validated the potential of NLP-driven autocoding in the Tw-DRGs process. Despite encountering certain discrepancies, our study demonstrates the significant value of the implemented models as tools that offer insights and support to CCSs during the coding process. The deployment of AI-assisted coding systems has the potential to enhance coding accuracy while simultaneously reducing manual workload, leading to improved process efficiency, lower error rates, and ultimately, a decrease in financial losses.</p>
        <p>While our proposed system effectively alleviates the manual workload of CCSs, our error analysis has also unveiled notable challenges. These include the absence of coding judgment information in discharge summaries, presence of coding answers not included in the training set, and the need for suggesting 1 or more main codes for the development of <italic>ICD-10</italic> coding system to assist in the DRG process. These findings underscore areas for further improvement and refinement in future iterations of our system. Addressing these challenges will be pivotal in enhancing the efficacy and reliability of automated coding systems, thereby maximizing their potential to support and streamline both clinical coding and DRG processes.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>Supplementary data for the process of Taiwan diagnosis related groups.</p>
        <media xlink:href="jmir_v26i1e58278_app1.docx" xlink:title="DOCX File , 323 KB"/>
      </supplementary-material>
      <supplementary-material id="app2">
        <label>Multimedia Appendix 2</label>
        <p>Supplementary data for the top 50 International Classification of Diseases, Tenth Revision, Clinical Modification, codes.</p>
        <media xlink:href="jmir_v26i1e58278_app2.docx" xlink:title="DOCX File , 15 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">AI</term>
          <def>
            <p>artificial intelligence</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">BERT</term>
          <def>
            <p>bidirectional encoder representations from transformers</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">CCS</term>
          <def>
            <p>certified coding specialist</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">CNN</term>
          <def>
            <p>convolutional neural network</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">DRG</term>
          <def>
            <p>diagnosis related group</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">EMR</term>
          <def>
            <p>electronic medical record</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">HAN</term>
          <def>
            <p>hierarchical attention network</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">ICD</term>
          <def>
            <p>International Classification of Diseases</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb9">ICD-9</term>
          <def>
            <p>International Classification of Diseases, Ninth Revision</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb10">ICD-10-CM</term>
          <def>
            <p>International Classification of Diseases, Tenth Revision, Clinical Modification</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb11">KMUCHH</term>
          <def>
            <p>Kaohsiung Medical University Chung-Ho Memorial Hospital</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb12">MDC</term>
          <def>
            <p>major diagnostic category</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb13">MIMIC</term>
          <def>
            <p>multiparameter intelligent monitoring in intensive care</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb14">NHIA</term>
          <def>
            <p>National Health Insurance Administration</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb15">NLP</term>
          <def>
            <p>natural language processing</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb16">Tw-DRG</term>
          <def>
            <p>Taiwan Diagnosis Related group</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <fn-group>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="web">
          <article-title>International classification of diseases:[9th] ninth revision, basic tabulation list with alphabetic index</article-title>
          <source>World Health Organization</source>
          <year>1978</year>
          <access-date>2024-04-29</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://iris.who.int/handle/10665/39473">https://iris.who.int/handle/10665/39473</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sivashankaran</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Borsi</surname>
              <given-names>JP</given-names>
            </name>
            <name name-style="western">
              <surname>Yoho</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Have ICD-10 coding practices changed since 2015?</article-title>
          <source>AMIA Annu Symp Proc</source>
          <year>2019</year>
          <volume>2019</volume>
          <fpage>804</fpage>
          <lpage>11</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/32308876"/>
          </comment>
          <pub-id pub-id-type="medline">32308876</pub-id>
          <pub-id pub-id-type="pmcid">PMC7153097</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>de Lissovoy</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Codes, coding, and COVID-19</article-title>
          <source>Med Care</source>
          <year>2020</year>
          <month>12</month>
          <volume>58</volume>
          <issue>12</issue>
          <fpage>1035</fpage>
          <lpage>6</lpage>
          <pub-id pub-id-type="doi">10.1097/MLR.0000000000001430</pub-id>
          <pub-id pub-id-type="medline">33003050</pub-id>
          <pub-id pub-id-type="pii">00005650-202012000-00001</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Steindel</surname>
              <given-names>SJ</given-names>
            </name>
          </person-group>
          <article-title>International classification of diseases, 10th edition, clinical modification and procedure coding system: descriptive overview of the next generation HIPAA code sets</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2010</year>
          <month>05</month>
          <day>01</day>
          <volume>17</volume>
          <issue>3</issue>
          <fpage>274</fpage>
          <lpage>82</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/20442144"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/jamia.2009.001230</pub-id>
          <pub-id pub-id-type="medline">20442144</pub-id>
          <pub-id pub-id-type="pii">17/3/274</pub-id>
          <pub-id pub-id-type="pmcid">PMC2995704</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mills</surname>
              <given-names>RE</given-names>
            </name>
            <name name-style="western">
              <surname>Butler</surname>
              <given-names>RR</given-names>
            </name>
            <name name-style="western">
              <surname>McCullough</surname>
              <given-names>EC</given-names>
            </name>
            <name name-style="western">
              <surname>Bao</surname>
              <given-names>MZ</given-names>
            </name>
            <name name-style="western">
              <surname>Averill</surname>
              <given-names>RF</given-names>
            </name>
          </person-group>
          <article-title>Impact of the transition to ICD-10 on Medicare inpatient hospital payments</article-title>
          <source>Medicare Medicaid Res Rev</source>
          <year>2011</year>
          <month>06</month>
          <day>06</day>
          <volume>1</volume>
          <issue>2</issue>
          <fpage>E1</fpage>
          <lpage>13</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/22340773"/>
          </comment>
          <pub-id pub-id-type="doi">10.5600/mmrr.001.02.a02</pub-id>
          <pub-id pub-id-type="medline">22340773</pub-id>
          <pub-id pub-id-type="pmcid">PMC4010447</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>LM</given-names>
            </name>
            <name name-style="western">
              <surname>Thacker</surname>
              <given-names>SB</given-names>
            </name>
          </person-group>
          <article-title>Public health surveillance and knowing about health in the context of growing sources of health data</article-title>
          <source>Am J Prev Med</source>
          <year>2011</year>
          <month>12</month>
          <volume>41</volume>
          <issue>6</issue>
          <fpage>636</fpage>
          <lpage>40</lpage>
          <pub-id pub-id-type="doi">10.1016/j.amepre.2011.08.015</pub-id>
          <pub-id pub-id-type="medline">22099242</pub-id>
          <pub-id pub-id-type="pii">S0749-3797(11)00674-X</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Uzkuraitis</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Hastings</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Torney</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>Casemix funding optimisation: working together to make the most of every episode</article-title>
          <source>Health Inf Manag</source>
          <year>2010</year>
          <month>10</month>
          <day>01</day>
          <volume>39</volume>
          <issue>3</issue>
          <fpage>47</fpage>
          <lpage>9</lpage>
          <pub-id pub-id-type="doi">10.1177/183335831003900309</pub-id>
          <pub-id pub-id-type="medline">28683680</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ho</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Guilcher</surname>
              <given-names>SJ</given-names>
            </name>
            <name name-style="western">
              <surname>McKenzie</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Mouneimne</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Williams</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Voth</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Cronin</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Noonan</surname>
              <given-names>VK</given-names>
            </name>
            <name name-style="western">
              <surname>Jaglal</surname>
              <given-names>SB</given-names>
            </name>
          </person-group>
          <article-title>Validation of algorithm to identify persons with non-traumatic spinal cord dysfunction in Canada using administrative health data</article-title>
          <source>Top Spinal Cord Inj Rehabil</source>
          <year>2017</year>
          <month>10</month>
          <volume>23</volume>
          <issue>4</issue>
          <fpage>333</fpage>
          <lpage>42</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/29339909"/>
          </comment>
          <pub-id pub-id-type="doi">10.1310/sci2304-333</pub-id>
          <pub-id pub-id-type="medline">29339909</pub-id>
          <pub-id pub-id-type="pmcid">PMC5667430</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Shivade</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Raghavan</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Fosler-Lussier</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Embi</surname>
              <given-names>PJ</given-names>
            </name>
            <name name-style="western">
              <surname>Elhadad</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Johnson</surname>
              <given-names>SB</given-names>
            </name>
            <name name-style="western">
              <surname>Lai</surname>
              <given-names>AM</given-names>
            </name>
          </person-group>
          <article-title>A review of approaches to identifying patient phenotype cohorts using electronic health records</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2014</year>
          <month>03</month>
          <day>01</day>
          <volume>21</volume>
          <issue>2</issue>
          <fpage>221</fpage>
          <lpage>30</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/24201027"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/amiajnl-2013-001935</pub-id>
          <pub-id pub-id-type="medline">24201027</pub-id>
          <pub-id pub-id-type="pii">amiajnl-2013-001935</pub-id>
          <pub-id pub-id-type="pmcid">PMC3932460</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Liao</surname>
              <given-names>KP</given-names>
            </name>
            <name name-style="western">
              <surname>Cai</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Savova</surname>
              <given-names>GK</given-names>
            </name>
            <name name-style="western">
              <surname>Murphy</surname>
              <given-names>SN</given-names>
            </name>
            <name name-style="western">
              <surname>Karlson</surname>
              <given-names>EW</given-names>
            </name>
            <name name-style="western">
              <surname>Ananthakrishnan</surname>
              <given-names>AN</given-names>
            </name>
            <name name-style="western">
              <surname>Gainer</surname>
              <given-names>VS</given-names>
            </name>
            <name name-style="western">
              <surname>Shaw</surname>
              <given-names>SY</given-names>
            </name>
            <name name-style="western">
              <surname>Xia</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Szolovits</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Churchill</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Kohane</surname>
              <given-names>I</given-names>
            </name>
          </person-group>
          <article-title>Development of phenotype algorithms using electronic medical records and incorporating natural language processing</article-title>
          <source>BMJ</source>
          <year>2015</year>
          <month>04</month>
          <day>24</day>
          <volume>350</volume>
          <issue>apr24 11</issue>
          <fpage>h1885</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/25911572"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/bmj.h1885</pub-id>
          <pub-id pub-id-type="medline">25911572</pub-id>
          <pub-id pub-id-type="pmcid">PMC4707569</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Banda</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Seneviratne</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Hernandez-Boussard</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Shah</surname>
              <given-names>NH</given-names>
            </name>
          </person-group>
          <article-title>Advances in electronic phenotyping: from rule-based definitions to machine learning models</article-title>
          <source>Annu Rev Biomed Data Sci</source>
          <year>2018</year>
          <month>07</month>
          <day>20</day>
          <volume>1</volume>
          <issue>1</issue>
          <fpage>53</fpage>
          <lpage>68</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/31218278"/>
          </comment>
          <pub-id pub-id-type="doi">10.1146/annurev-biodatasci-080917-013315</pub-id>
          <pub-id pub-id-type="medline">31218278</pub-id>
          <pub-id pub-id-type="pmcid">PMC6583807</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>JJ</given-names>
            </name>
          </person-group>
          <article-title>Implementation and outcome of Taiwan diagnosis-related group (DRG) payment system</article-title>
          <source>Georgia State University</source>
          <year>2015</year>
          <access-date>2024-04-29</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://scholarworks.gsu.edu/iph_theses/357/">https://scholarworks.gsu.edu/iph_theses/357/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>CY</given-names>
            </name>
            <name name-style="western">
              <surname>Chien</surname>
              <given-names>LC</given-names>
            </name>
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>CC</given-names>
            </name>
            <name name-style="western">
              <surname>Ma</surname>
              <given-names>HM</given-names>
            </name>
            <name name-style="western">
              <surname>Hu</surname>
              <given-names>RH</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>CL</given-names>
            </name>
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Original article: the impacts of DRG payment system on financial balance of multiple trauma: experiences of three trauma centers in Taiwan</article-title>
          <source>Injury</source>
          <year>2023</year>
          <month>09</month>
          <volume>54</volume>
          <issue>9</issue>
          <fpage>110703</fpage>
          <pub-id pub-id-type="doi">10.1016/j.injury.2023.03.031</pub-id>
          <pub-id pub-id-type="medline">37045657</pub-id>
          <pub-id pub-id-type="pii">S0020-1383(23)00287-5</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ayub</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Scali</surname>
              <given-names>ST</given-names>
            </name>
            <name name-style="western">
              <surname>Richter</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Huber</surname>
              <given-names>TS</given-names>
            </name>
            <name name-style="western">
              <surname>Beck</surname>
              <given-names>AW</given-names>
            </name>
            <name name-style="western">
              <surname>Fatima</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Berceli</surname>
              <given-names>SA</given-names>
            </name>
            <name name-style="western">
              <surname>Upchurch</surname>
              <given-names>GR</given-names>
            </name>
            <name name-style="western">
              <surname>Arnaoutakis</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Back</surname>
              <given-names>MR</given-names>
            </name>
            <name name-style="western">
              <surname>Giles</surname>
              <given-names>KA</given-names>
            </name>
          </person-group>
          <article-title>Financial implications of coding inaccuracies in patients undergoing elective endovascular abdominal aortic aneurysm repair</article-title>
          <source>J Vasc Surg</source>
          <year>2019</year>
          <month>01</month>
          <volume>69</volume>
          <issue>1</issue>
          <fpage>210</fpage>
          <lpage>8</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S0741-5214(18)30982-0"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jvs.2018.04.027</pub-id>
          <pub-id pub-id-type="medline">29937283</pub-id>
          <pub-id pub-id-type="pii">S0741-5214(18)30982-0</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Toner</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Khaled</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Ramesh</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Qureshi</surname>
              <given-names>MK</given-names>
            </name>
            <name name-style="western">
              <surname>Al Suyyagh</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Dunkow</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Financial impact of inaccurate coding plus cost-effectiveness analysis for surgically managed patients with periprosthetic fractures</article-title>
          <source>Cureus</source>
          <year>2021</year>
          <month>02</month>
          <day>01</day>
          <volume>13</volume>
          <issue>2</issue>
          <fpage>e13060</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/33680602"/>
          </comment>
          <pub-id pub-id-type="doi">10.7759/cureus.13060</pub-id>
          <pub-id pub-id-type="medline">33680602</pub-id>
          <pub-id pub-id-type="pmcid">PMC7929546</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>O'Malley</surname>
              <given-names>KJ</given-names>
            </name>
            <name name-style="western">
              <surname>Cook</surname>
              <given-names>KF</given-names>
            </name>
            <name name-style="western">
              <surname>Price</surname>
              <given-names>MD</given-names>
            </name>
            <name name-style="western">
              <surname>Wildes</surname>
              <given-names>KR</given-names>
            </name>
            <name name-style="western">
              <surname>Hurdle</surname>
              <given-names>JF</given-names>
            </name>
            <name name-style="western">
              <surname>Ashton</surname>
              <given-names>CM</given-names>
            </name>
          </person-group>
          <article-title>Measuring diagnoses: ICD code accuracy</article-title>
          <source>Health Serv Res</source>
          <year>2005</year>
          <month>10</month>
          <day>11</day>
          <volume>40</volume>
          <issue>5 Pt 2</issue>
          <fpage>1620</fpage>
          <lpage>39</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/16178999"/>
          </comment>
          <pub-id pub-id-type="doi">10.1111/j.1475-6773.2005.00444.x</pub-id>
          <pub-id pub-id-type="medline">16178999</pub-id>
          <pub-id pub-id-type="pii">HESR444</pub-id>
          <pub-id pub-id-type="pmcid">PMC1361216</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kaur</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Distributed knowledge based clinical auto-coding system</article-title>
          <source>Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics: Student Research Workshop</source>
          <year>2019</year>
          <conf-name>ACL '19</conf-name>
          <conf-date>July 28-August 2, 2019</conf-date>
          <conf-loc>Florence, Italy</conf-loc>
          <fpage>1</fpage>
          <lpage>9</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://aclanthology.org/P19-2001.pdf"/>
          </comment>
          <pub-id pub-id-type="doi">10.18653/v1/p19-2001</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Donaldson</surname>
              <given-names>MS</given-names>
            </name>
            <name name-style="western">
              <surname>Corrigan</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Kohn</surname>
              <given-names>LT</given-names>
            </name>
          </person-group>
          <source>To Err is Human: Building a Safer Health System</source>
          <year>2000</year>
          <publisher-loc>Washington, DC</publisher-loc>
          <publisher-name>National Academies Press</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yeow</surname>
              <given-names>JA</given-names>
            </name>
            <name name-style="western">
              <surname>Ng</surname>
              <given-names>PK</given-names>
            </name>
            <name name-style="western">
              <surname>Tan</surname>
              <given-names>KS</given-names>
            </name>
            <name name-style="western">
              <surname>Chin</surname>
              <given-names>TS</given-names>
            </name>
            <name name-style="western">
              <surname>Lim</surname>
              <given-names>WY</given-names>
            </name>
          </person-group>
          <article-title>Effects of stress, repetition, fatigue and work environment on human error in manufacturing industries</article-title>
          <source>J Appl Sci</source>
          <year>2014</year>
          <month>12</month>
          <day>1</day>
          <volume>14</volume>
          <issue>24</issue>
          <fpage>3464</fpage>
          <lpage>71</lpage>
          <pub-id pub-id-type="doi">10.3923/JAS.2014.3464.3471</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="web">
          <article-title>International statistical classification of diseases and related health problems: alphabetical index</article-title>
          <source>World Health Organization</source>
          <year>2004</year>
          <access-date>2024-04-29</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.who.int/standards/classifications/classification-of-diseases">https://www.who.int/standards/classifications/classification-of-diseases</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chang</surname>
              <given-names>NW</given-names>
            </name>
            <name name-style="western">
              <surname>Dai</surname>
              <given-names>HJ</given-names>
            </name>
            <name name-style="western">
              <surname>Jonnagaddala</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>CW</given-names>
            </name>
            <name name-style="western">
              <surname>Tsai</surname>
              <given-names>RT</given-names>
            </name>
            <name name-style="western">
              <surname>Hsu</surname>
              <given-names>WL</given-names>
            </name>
          </person-group>
          <article-title>A context-aware approach for progression tracking of medical concepts in electronic medical records</article-title>
          <source>J Biomed Inform</source>
          <year>2015</year>
          <month>12</month>
          <volume>58</volume>
          <issue>Suppl</issue>
          <fpage>S150</fpage>
          <lpage>7</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(15)00207-5"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2015.09.013</pub-id>
          <pub-id pub-id-type="medline">26432355</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(15)00207-5</pub-id>
          <pub-id pub-id-type="pmcid">PMC4977838</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Dyer</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>He</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Smola</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Hovy</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Hierarchical attention networks for document classificatio</article-title>
          <source>Proceedings of the 2016 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies</source>
          <year>2016</year>
          <conf-name>NAACL '16</conf-name>
          <conf-date>June 12-17, 2016</conf-date>
          <conf-loc>San Diego, CA</conf-loc>
          <fpage>1480</fpage>
          <lpage>9</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://aclanthology.org/N16-1174.pdf"/>
          </comment>
          <pub-id pub-id-type="doi">10.18653/v1/n16-1174</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Radford</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Child</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Luan</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Amodei</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Sutskever</surname>
              <given-names>I</given-names>
            </name>
          </person-group>
          <article-title>Language models are unsupervised multitask learners</article-title>
          <source>OpenAI blog</source>
          <year>2019</year>
          <volume>1</volume>
          <issue>8</issue>
          <fpage>9</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://d4mucfpksywv.cloudfront.net/better-language-models/language_models_are_unsupervised_multitask_learners.pdf"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Dong</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Suárez-Paniagua</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Whiteley</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Explainable automated coding of clinical notes using hierarchical label-wise attention networks and label embedding initialisation</article-title>
          <source>J Biomed Inform</source>
          <year>2021</year>
          <month>04</month>
          <volume>116</volume>
          <fpage>103728</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(21)00057-5"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2021.103728</pub-id>
          <pub-id pub-id-type="medline">33711543</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(21)00057-5</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Teng</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Song</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Han</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Lu</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>A survey on the interpretability of deep learning in medical diagnosis</article-title>
          <source>Multimed Syst</source>
          <year>2022</year>
          <month>06</month>
          <day>25</day>
          <volume>28</volume>
          <issue>6</issue>
          <fpage>2335</fpage>
          <lpage>55</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/35789785"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/s00530-022-00960-4</pub-id>
          <pub-id pub-id-type="medline">35789785</pub-id>
          <pub-id pub-id-type="pii">960</pub-id>
          <pub-id pub-id-type="pmcid">PMC9243744</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Tan</surname>
              <given-names>TF</given-names>
            </name>
            <name name-style="western">
              <surname>Lu</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Thirunavukarasu</surname>
              <given-names>AJ</given-names>
            </name>
            <name name-style="western">
              <surname>Ting</surname>
              <given-names>DS</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <article-title>Large language models in health care: development, applications, and challenges</article-title>
          <source>Health Care Sci</source>
          <year>2023</year>
          <month>08</month>
          <day>24</day>
          <volume>2</volume>
          <issue>4</issue>
          <fpage>255</fpage>
          <lpage>63</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/38939520"/>
          </comment>
          <pub-id pub-id-type="doi">10.1002/hcs2.61</pub-id>
          <pub-id pub-id-type="medline">38939520</pub-id>
          <pub-id pub-id-type="pii">HCS261</pub-id>
          <pub-id pub-id-type="pmcid">PMC11080827</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Pennington</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Socher</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Manning</surname>
              <given-names>CD</given-names>
            </name>
          </person-group>
          <article-title>Glove: global vectors for word representation</article-title>
          <source>Proceedings of the 2014 Conference on Empirical Methods in Natural Language Processing</source>
          <year>2014</year>
          <conf-name>EMNLP '14</conf-name>
          <conf-date>October 25-29, 2014</conf-date>
          <conf-loc>Doha, Qatar</conf-loc>
          <fpage>1532</fpage>
          <lpage>43</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://aclanthology.org/D14-1162.pdf"/>
          </comment>
          <pub-id pub-id-type="doi">10.3115/v1/d14-1162</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Papanikolaou</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Pierleoni</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Dare: data augmented relation extraction with gpt-2</article-title>
          <source>arXiv</source>
          <comment>Preprint posted online April 6, 2020</comment>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/2004.13845"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Devlin</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Chang</surname>
              <given-names>MW</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Toutanova</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>BERT: pre-training of deep bidirectional transformers for language understanding</article-title>
          <source>Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies</source>
          <year>2019</year>
          <conf-name>NAACL-HLT '19</conf-name>
          <conf-date>June 2-7, 2019</conf-date>
          <conf-loc>Minneapolis, MN</conf-loc>
          <fpage>4171</fpage>
          <lpage>86</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://aclanthology.org/N19-1423.pdf"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>PF</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>SM</given-names>
            </name>
            <name name-style="western">
              <surname>Liao</surname>
              <given-names>WC</given-names>
            </name>
            <name name-style="western">
              <surname>Kuo</surname>
              <given-names>LC</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>KC</given-names>
            </name>
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>YC</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Chiu</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Chang</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Lai</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <article-title>Automatic ICD-10 coding and training system: deep neural network based on supervised learning</article-title>
          <source>JMIR Med Inform</source>
          <year>2021</year>
          <month>08</month>
          <day>31</day>
          <volume>9</volume>
          <issue>8</issue>
          <fpage>e23230</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://medinform.jmir.org/2021/8/e23230/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/23230</pub-id>
          <pub-id pub-id-type="medline">34463639</pub-id>
          <pub-id pub-id-type="pii">v9i8e23230</pub-id>
          <pub-id pub-id-type="pmcid">PMC8441604</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lai</surname>
              <given-names>CH</given-names>
            </name>
            <name name-style="western">
              <surname>Shian</surname>
              <given-names>BT</given-names>
            </name>
            <name name-style="western">
              <surname>Ke</surname>
              <given-names>CR</given-names>
            </name>
          </person-group>
          <article-title>Demonstration website for the developed NLP-driven AI-assisted ICD-10-CM coding system</article-title>
          <source>ISLAB</source>
          <year>2024</year>
          <access-date>2024-08-22</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://140.127.114.22/Patient/Detail">http://140.127.114.22/Patient/Detail</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kingma</surname>
              <given-names>DP</given-names>
            </name>
            <name name-style="western">
              <surname>Ba</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Adam: a method for stochastic optimization</article-title>
          <source>arXiv</source>
          <comment>Preprint posted online December 22, 2014</comment>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1412.6980"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Loshchilov</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Hutter</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <article-title>Decoupled weight decay regularization</article-title>
          <source>arXiv</source>
          <comment>Preprint posted online November 14, 2017</comment>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1711.05101"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Landis</surname>
              <given-names>JR</given-names>
            </name>
            <name name-style="western">
              <surname>Koch</surname>
              <given-names>GG</given-names>
            </name>
          </person-group>
          <article-title>The measurement of observer agreement for categorical data</article-title>
          <source>Biometrics</source>
          <year>1977</year>
          <month>03</month>
          <volume>33</volume>
          <issue>1</issue>
          <fpage>159</fpage>
          <pub-id pub-id-type="doi">10.2307/2529310</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ji</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Sun</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Dong</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Taalas</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Pitkänen</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Marttinen</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>A unified review of deep learning for automated medical coding</article-title>
          <source>ACM Comput Surv</source>
          <year>2024</year>
          <month>05</month>
          <day>17</day>
          <fpage>1</fpage>
          <lpage>38</lpage>
          <pub-id pub-id-type="doi">10.1145/3664615</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Vu</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Nguyen</surname>
              <given-names>DQ</given-names>
            </name>
            <name name-style="western">
              <surname>Nguyen</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>A label attention model for ICD coding from clinical text</article-title>
          <source>Proceedings of the 29th International Conference on International Joint Conferences on Artificial Intelligence</source>
          <year>2020</year>
          <conf-name>IJCAI 20</conf-name>
          <conf-date>January 7-15, 2021</conf-date>
          <conf-loc>Yokohama, Japan</conf-loc>
          <fpage>3335</fpage>
          <lpage>41</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dl.acm.org/doi/abs/10.5555/3491440.3491901"/>
          </comment>
          <pub-id pub-id-type="doi">10.24963/ijcai.2020/461</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Zeng</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Yu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>A pseudo label-wise attention network for automatic ICD coding</article-title>
          <source>IEEE J Biomed Health Inform</source>
          <year>2022</year>
          <month>10</month>
          <volume>26</volume>
          <issue>10</issue>
          <fpage>5201</fpage>
          <lpage>12</lpage>
          <pub-id pub-id-type="doi">10.1109/jbhi.2022.3193291</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref38">
        <label>38</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Li</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Fei</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Zeng</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>FX</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Pan</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Automated ICD-9 coding via a deep learning approach</article-title>
          <source>IEEE/ACM Trans Comput Biol Bioinform</source>
          <year>2019</year>
          <volume>16</volume>
          <issue>4</issue>
          <fpage>1193</fpage>
          <lpage>202</lpage>
          <pub-id pub-id-type="doi">10.1109/TCBB.2018.2817488</pub-id>
          <pub-id pub-id-type="medline">29994157</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref39">
        <label>39</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Le</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Mikolov</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Distributed representations of sentences and documents</article-title>
          <source>Proceedings of the 31st International Conference on International Conference on Machine Learning</source>
          <year>2014</year>
          <conf-name>ICML '14</conf-name>
          <conf-date>June 21-26, 2014</conf-date>
          <conf-loc>Beijing, China</conf-loc>
          <fpage>1188</fpage>
          <lpage>96</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dl.acm.org/doi/10.5555/3044805.3045025"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref40">
        <label>40</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zeng</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Fei</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Yu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Pan</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Automatic ICD-9 coding via deep transfer learning</article-title>
          <source>Neurocomputing</source>
          <year>2019</year>
          <month>01</month>
          <volume>324</volume>
          <fpage>43</fpage>
          <lpage>50</lpage>
          <pub-id pub-id-type="doi">10.1016/J.NEUCOM.2018.04.081</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref41">
        <label>41</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Tsatsaronis</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Balikas</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Malakasiotis</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Partalas</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Zschunke</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Alvers</surname>
              <given-names>MR</given-names>
            </name>
            <name name-style="western">
              <surname>Weissenborn</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Krithara</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Petridis</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Polychronopoulos</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Almirantis</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Pavlopoulos</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Baskiotis</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Gallinari</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Artiéres</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Ngomo</surname>
              <given-names>AN</given-names>
            </name>
            <name name-style="western">
              <surname>Heino</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Gaussier</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Barrio-Alvers</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Schroeder</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Androutsopoulos</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Paliouras</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>An overview of the BIOASQ large-scale biomedical semantic indexing and question answering competition</article-title>
          <source>BMC Bioinformatics</source>
          <year>2015</year>
          <month>04</month>
          <day>30</day>
          <volume>16</volume>
          <issue>1</issue>
          <fpage>138</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcbioinformatics.biomedcentral.com/articles/10.1186/s12859-015-0564-6"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12859-015-0564-6</pub-id>
          <pub-id pub-id-type="medline">25925131</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12859-015-0564-6</pub-id>
          <pub-id pub-id-type="pmcid">PMC4450488</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref42">
        <label>42</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bhutto</surname>
              <given-names>SR</given-names>
            </name>
            <name name-style="western">
              <surname>Zeng</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Niu</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Khoso</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Umar</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Lalley</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Automatic ICD-10-CM coding via Lambda-Scaled attention based deep learning model</article-title>
          <source>Methods</source>
          <year>2024</year>
          <month>02</month>
          <volume>222</volume>
          <fpage>19</fpage>
          <lpage>27</lpage>
          <pub-id pub-id-type="doi">10.1016/j.ymeth.2023.11.017</pub-id>
          <pub-id pub-id-type="medline">38141869</pub-id>
          <pub-id pub-id-type="pii">S1046-2023(23)00209-8</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref43">
        <label>43</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zafirah</surname>
              <given-names>SA</given-names>
            </name>
            <name name-style="western">
              <surname>Nur</surname>
              <given-names>AM</given-names>
            </name>
            <name name-style="western">
              <surname>Puteh</surname>
              <given-names>SE</given-names>
            </name>
            <name name-style="western">
              <surname>Aljunid</surname>
              <given-names>SM</given-names>
            </name>
          </person-group>
          <article-title>Potential loss of revenue due to errors in clinical coding during the implementation of the Malaysia diagnosis related group (MY-DRG) Casemix system in a teaching hospital in Malaysia</article-title>
          <source>BMC Health Serv Res</source>
          <year>2018</year>
          <month>01</month>
          <day>25</day>
          <volume>18</volume>
          <issue>1</issue>
          <fpage>38</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmchealthservres.biomedcentral.com/articles/10.1186/s12913-018-2843-1"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12913-018-2843-1</pub-id>
          <pub-id pub-id-type="medline">29370785</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12913-018-2843-1</pub-id>
          <pub-id pub-id-type="pmcid">PMC5784726</pub-id>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
