<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMIR</journal-id>
      <journal-id journal-id-type="nlm-ta">J Med Internet Res</journal-id>
      <journal-title>Journal of Medical Internet Research</journal-title>
      <issn pub-type="epub">1438-8871</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v27i1e67033</article-id>
      <article-id pub-id-type="pmid">40100267</article-id>
      <article-id pub-id-type="doi">10.2196/67033</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Prompt Framework for Extracting Scale-Related Knowledge Entities from Chinese Medical Literature: Development and Evaluation Study</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Schwartz</surname>
            <given-names>Amy</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Zaghir</surname>
            <given-names>Jamil</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Luo</surname>
            <given-names>Guan</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Hao</surname>
            <given-names>Jie</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0002-7208-7443</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Chen</surname>
            <given-names>Zhenli</given-names>
          </name>
          <degrees>BS</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0008-9664-085X</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Peng</surname>
            <given-names>Qinglong</given-names>
          </name>
          <degrees>BS</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0006-2545-1344</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Zhao</surname>
            <given-names>Liang</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff4" ref-type="aff">4</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-6505-290X</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author">
          <name name-style="western">
            <surname>Zhao</surname>
            <given-names>Wanqing</given-names>
          </name>
          <degrees>MS</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-3705-5737</ext-link>
        </contrib>
        <contrib id="contrib6" contrib-type="author">
          <name name-style="western">
            <surname>Cong</surname>
            <given-names>Shan</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-5981-7678</ext-link>
        </contrib>
        <contrib id="contrib7" contrib-type="author">
          <name name-style="western">
            <surname>Li</surname>
            <given-names>Junlian</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0000-1028-0235</ext-link>
        </contrib>
        <contrib id="contrib8" contrib-type="author">
          <name name-style="western">
            <surname>Li</surname>
            <given-names>Jiao</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-6391-8343</ext-link>
        </contrib>
        <contrib id="contrib9" contrib-type="author">
          <name name-style="western">
            <surname>Qian</surname>
            <given-names>Qing</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-9072-586X</ext-link>
        </contrib>
        <contrib id="contrib10" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Sun</surname>
            <given-names>Haixia</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution/>
            <institution>Institute of Medical Information/Medical Library</institution>
            <institution>Chinese Academy of Medical Sciences &#38; Peking Union Medical College</institution>
            <addr-line>No. 3, Yabao Road</addr-line>
            <addr-line>Chaoyang District</addr-line>
            <addr-line>Beijing, 100020</addr-line>
            <country>China</country>
            <phone>86 01052328741</phone>
            <email>sun.haixia@imicams.ac.cn</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0004-1652-5335</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Institute of Medical Information/Medical Library</institution>
        <institution>Chinese Academy of Medical Sciences &#38; Peking Union Medical College</institution>
        <addr-line>Beijing</addr-line>
        <country>China</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Qingdao Innovation and Development Center</institution>
        <institution>Harbin Engineering University</institution>
        <addr-line>Qingdao</addr-line>
        <country>China</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>College of Intelligent Systems Science and Engineering</institution>
        <institution>Harbin Engineering University</institution>
        <addr-line>Harbin</addr-line>
        <country>China</country>
      </aff>
      <aff id="aff4">
        <label>4</label>
        <institution>Department of Thoracic Surgery</institution>
        <institution>National Cancer Center/National Clinical Research Center for Cancer/Cancer Hospital</institution>
        <institution>Chinese Academy of Medical Sciences &#38; Peking Union Medical College</institution>
        <addr-line>Beijing</addr-line>
        <country>China</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Haixia Sun <email>sun.haixia@imicams.ac.cn</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <year>2025</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>18</day>
        <month>3</month>
        <year>2025</year>
      </pub-date>
      <volume>27</volume>
      <elocation-id>e67033</elocation-id>
      <history>
        <date date-type="received">
          <day>30</day>
          <month>9</month>
          <year>2024</year>
        </date>
        <date date-type="rev-request">
          <day>15</day>
          <month>11</month>
          <year>2024</year>
        </date>
        <date date-type="rev-recd">
          <day>6</day>
          <month>12</month>
          <year>2024</year>
        </date>
        <date date-type="accepted">
          <day>21</day>
          <month>2</month>
          <year>2025</year>
        </date>
      </history>
      <copyright-statement>©Jie Hao, Zhenli Chen, Qinglong Peng, Liang Zhao, Wanqing Zhao, Shan Cong, Junlian Li, Jiao Li, Qing Qian, Haixia Sun. Originally published in the Journal of Medical Internet Research (https://www.jmir.org), 18.03.2025.</copyright-statement>
      <copyright-year>2025</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in the Journal of Medical Internet Research (ISSN 1438-8871), is properly cited. The complete bibliographic information, a link to the original publication on https://www.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://www.jmir.org/2025/1/e67033" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Measurement-based care improves patient outcomes by using standardized scales, but its widespread adoption is hindered by the lack of accessible and structured knowledge, particularly in unstructured Chinese medical literature. Extracting scale-related knowledge entities from these texts is challenging due to limited annotated data. While large language models (LLMs) show promise in named entity recognition (NER), specialized prompting strategies are needed to accurately recognize medical scale-related entities, especially in low-resource settings.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>This study aims to develop and evaluate MedScaleNER, a task-oriented prompt framework designed to optimize LLM performance in recognizing medical scale-related entities from Chinese medical literature.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>MedScaleNER incorporates demonstration retrieval within in-context learning, chain-of-thought prompting, and self-verification strategies to improve performance. The framework dynamically retrieves optimal examples using a k-nearest neighbors approach and decomposes the NER task into two subtasks: entity type identification and entity labeling. Self-verification ensures the reliability of the final output. A dataset of manually annotated Chinese medical journal papers was constructed, focusing on three key entity types: scale names, measurement concepts, and measurement items. Experiments were conducted by varying the number of examples and the proportion of training data to evaluate performance in low-resource settings. Additionally, MedScaleNER’s performance was compared with locally fine-tuned models.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>The CMedS-NER (Chinese Medical Scale Corpus for Named Entity Recognition) dataset, containing 720 papers with 27,499 manually annotated scale-related knowledge entities, was used for evaluation. Initial experiments identified GLM-4-0520 as the best-performing LLM among six tested models. When applied with GLM-4-0520, MedScaleNER significantly improved NER performance for scale-related entities, achieving a macro <italic>F</italic><sub>1</sub>-score of 59.64% in an exact string match with the full training dataset. The highest performance was achieved with 20-shot demonstrations. Under low-resource scenarios (eg, 1% of the training data), MedScaleNER outperformed all tested locally fine-tuned models. Ablation studies highlighted the importance of demonstration retrieval and self-verification in improving model reliability. Error analysis revealed four main types of mistakes: identification errors, type errors, boundary errors, and missing entities, indicating areas for further improvement.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>MedScaleNER advances the application of LLMs and prompts engineering for specialized NER tasks in Chinese medical literature. By addressing the challenges of unstructured texts and limited annotated data, MedScaleNER’s adaptability to various biomedical contexts supports more efficient and reliable knowledge extraction, contributing to broader measurement-based care implementation and improved clinical and research outcomes.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>prompt engineering</kwd>
        <kwd>named entity recognition</kwd>
        <kwd>in-context learning</kwd>
        <kwd>large language model</kwd>
        <kwd>Chinese medical literature</kwd>
        <kwd>measurement-based care</kwd>
        <kwd>framework</kwd>
        <kwd>prompt</kwd>
        <kwd>prompt framework</kwd>
        <kwd>scale</kwd>
        <kwd>China</kwd>
        <kwd>medical literature</kwd>
        <kwd>MBC</kwd>
        <kwd>LLM</kwd>
        <kwd>MedScaleNER</kwd>
        <kwd>retrieval</kwd>
        <kwd>information retrieval</kwd>
        <kwd>dataset</kwd>
        <kwd>artificial intelligence</kwd>
        <kwd>AI</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <p>Measurement-based care (MBC), which involves the systematic use of standardized scales to assess patient conditions, track progress, and inform clinical decisions, has been shown to enhance patient outcomes and optimize health care processes [<xref ref-type="bibr" rid="ref1">1</xref>]. Despite its significant benefits, MBC remains underused worldwide, with less than 20% of health practitioners incorporating it into their routine practice [<xref ref-type="bibr" rid="ref2">2</xref>]. A primary barrier is the lack of accessible and comprehensive knowledge about these scales [<xref ref-type="bibr" rid="ref3">3</xref>,<xref ref-type="bibr" rid="ref4">4</xref>]. Clinicians are often unaware of which scales are suitable for specific conditions and lack a clear understanding of the concepts and items within those scales [<xref ref-type="bibr" rid="ref5">5</xref>]. Acquiring this knowledge requires considerable time and effort in searching and reviewing various unstructured documents, such as technical reports, academic papers, and manuals. This not only adds to the workload of already busy medical professionals [<xref ref-type="bibr" rid="ref6">6</xref>] but also hinders the widespread adoption of MBC [<xref ref-type="bibr" rid="ref7">7</xref>]. Transforming unstructured scale-related documents into computable and accessible knowledge systems, such as knowledge graphs, could help alleviate the burden on practitioners and promote MBC adoption [<xref ref-type="bibr" rid="ref8">8</xref>].</p>
      <p>The key to this transformation lies in accurately extracting scale-related knowledge entities from unstructured medical texts. However, several challenges complicate this task. The complexity of medical language, coupled with the variety of scale-related entities, including scale names, measurement concepts, and measurement items, makes accurate extraction difficult [<xref ref-type="bibr" rid="ref1">1</xref>]. For instance, the entity “scale name” may refer solely to the scale itself or include additional details such as its developer, language, version, or population-specific characteristics. Furthermore, extracting knowledge entities from Chinese medical texts introduces additional difficulties due to linguistic variations and the limited availability of annotated data specific to medical scales in Chinese [<xref ref-type="bibr" rid="ref9">9</xref>]. Traditional information extraction methods often depend on extensive data annotation and model fine-tuning, which are resource-intensive and struggle to adapt to new tasks or domains.</p>
      <p>Addressing these challenges requires innovative solutions that can handle the complexity and variability of medical scale information, particularly in Chinese, with limited resources. Large language models (LLMs) such as GPT [<xref ref-type="bibr" rid="ref10">10</xref>], GLM [<xref ref-type="bibr" rid="ref11">11</xref>], LLaMA [<xref ref-type="bibr" rid="ref12">12</xref>], and Qwen [<xref ref-type="bibr" rid="ref13">13</xref>] have recently demonstrated impressive capabilities in natural language understanding and in-context learning (ICL) [<xref ref-type="bibr" rid="ref14">14</xref>], enabling them to perform complex language tasks with minimal fine-tuning and limited annotated data [<xref ref-type="bibr" rid="ref15">15</xref>,<xref ref-type="bibr" rid="ref16">16</xref>]. By leveraging prompt engineering, these models can generate targeted outputs from well-crafted inputs, reducing the need for extensive datasets and making them particularly valuable in low-resource scenarios [<xref ref-type="bibr" rid="ref17">17</xref>-<xref ref-type="bibr" rid="ref19">19</xref>]. Moreover, LLMs have shown the ability to perform tasks in zero-shot and few-shot settings [<xref ref-type="bibr" rid="ref20">20</xref>,<xref ref-type="bibr" rid="ref21">21</xref>], allowing them to generalize from minimal demonstrations (examples) [<xref ref-type="bibr" rid="ref22">22</xref>]. However, their effectiveness in information extraction tasks is highly dependent on the quality of the prompt design, including the task instructions, example selection, and output formats [<xref ref-type="bibr" rid="ref23">23</xref>]. In few-shot settings, selecting the most appropriate examples is crucial for guiding the LLM toward accurate information extraction [<xref ref-type="bibr" rid="ref24">24</xref>]. This selection process is particularly challenging in specialized domains, where the examples should not only be relevant but also representative of the task’s complexity.</p>
      <p>To improve the performance of LLMs in information extraction tasks, researchers have explored techniques such as demonstration retrieval within the ICL paradigm [<xref ref-type="bibr" rid="ref25">25</xref>]. In this approach, relevant examples are dynamically selected from a pool of annotated data based on their similarity to the input text, addressing the issue of sample representativeness [<xref ref-type="bibr" rid="ref26">26</xref>]. Moreover, techniques such as chain-of-thought (CoT) prompting [<xref ref-type="bibr" rid="ref27">27</xref>] and self-verification [<xref ref-type="bibr" rid="ref25">25</xref>] have been used to enhance the accuracy and robustness of domain-specific tasks. CoT prompting involves breaking down complex tasks into subtasks, and guiding the LLM step-by-step through multiturn dialogue [<xref ref-type="bibr" rid="ref27">27</xref>], while self-verification allows the model to review and refine its outputs [<xref ref-type="bibr" rid="ref28">28</xref>]. Despite these promising strategies, most existing research focuses on extracting common medical entities such as diseases, symptoms, drugs, and procedures [<xref ref-type="bibr" rid="ref29">29</xref>], with limited attention to the extraction of scale-related entities.</p>
      <p>Given the importance of task-oriented prompt design and the linguistic complexities of Chinese medical literature, there is an urgent need to develop a specialized framework for medical scale–related knowledge extraction. In this study, we propose MedScaleNER, a task-oriented prompt framework tailored for named entity recognition (NER) of medical scales in Chinese medical literature. MedScaleNER incorporates demonstration retrieval, CoT prompting, and self-verification strategies to tackle the specific challenges associated with extracting scale-related knowledge entities in Chinese. By dynamically selecting representative examples, the framework enhances the generalization capabilities of LLMs and improves extraction performance in few-shot scenarios. CoT prompting decomposes the scale-related NER task into manageable subtasks, easing the cognitive load on LLMs, while self-verification ensures output reliability.</p>
      <p>This study emphasizes the significance of prompt design in LLM-based information extraction, particularly in specialized domains with limited annotated data. By combining demonstration retrieval with advanced prompt strategies, we aim to overcome the challenges posed by data scarcity and the linguistic variations of Chinese medical literature. To facilitate evaluation, we constructed a manually annotated corpus of Chinese medical scales, covering three key entity types of scale names, measurement concepts, and measurement items. We conducted an in-depth assessment of MedScaleNER’s effectiveness on this self-built dataset, examining the impact of the number of demonstrations, the contributions of CoT and self-verification, and the annotated data size required for optimal performance. Our approach contributes to building comprehensive scale knowledge systems, supporting clinicians and researchers in clinical and research efforts, promoting MBC adoption, and ultimately improving patient care.</p>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Overview</title>
        <p>The workflow of the proposed MedScaleNER prompt framework is illustrated in <xref rid="figure1" ref-type="fig">Figure 1</xref> and consists of three main stages: dataset preparation and annotation, design and implementation of the MedScaleNER framework, and in-depth evaluation and comparison. The process begins with the collection of high-quality Chinese journal papers focused on medical scales. These papers are preprocessed and manually annotated to extract three key types of scale-related entities: scale names, measurement concepts, and measurement items. This manually annotated corpus fills the gap caused by the limited availability of annotated data in this area, while also reflecting the complexities unique to Chinese medical literature.</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>Workflow of the MedScaleNER prompt framework. LLM: large language model.</p>
          </caption>
          <graphic xlink:href="jmir_v27i1e67033_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <p>To address the task of medical scale–related NER, we introduce the MedScaleNER framework, which incorporates demonstration retrieval within the ICL paradigm, CoT prompting, and self-verification techniques. The framework selects relevant examples dynamically, breaks down complex NER tasks into manageable subtasks, and improves the reliability of outputs through self-verification. We evaluate MedScaleNER comprehensively, including comparisons of performance with varying numbers of retrieved demonstrations, ablation studies to determine the impact of CoT and self-verification, and assessments of its effectiveness in low-resource scenarios with different training data sizes. Additionally, we benchmark its performance against traditional fine-tuned LLMs in local.</p>
        <p>Formally, the task is defined as follows. Given a collection of Chinese academic documents related to medical scales, denoted as <italic>D</italic>, where each document <italic>D<sub>i</sub></italic> consists of a sequence of sentences <italic>S</italic> = {<italic>s<sub>1</sub></italic>, <italic>s<sub>2</sub></italic>, ..., <italic>s<sub>n</sub></italic>}, and a set of entity types <italic>T</italic> = {scale, concept, item}, the goal of MedScaleNER is to identify all entities <italic>e<sub>i</sub></italic> within <italic>D</italic> and assign the appropriate type <italic>t<sub>i</sub></italic> ∈ <italic>T</italic> to each identified entity.</p>
      </sec>
      <sec>
        <title>Ethical Considerations</title>
        <p>This study used only publicly available published papers from the China National Knowledge Infrastructure, which consist of academic literature and do not contain real patient information. Since the data is publicly accessible and does not involve human participants or private data, ethical approval was not required.</p>
      </sec>
      <sec>
        <title>Dataset Preparation and Annotation</title>
        <p>Due to the lack of annotated datasets for knowledge entity recognition in Chinese medical scales, we constructed a manually annotated corpus from full-text medical journal papers. The annotation focused on three key types of knowledge entities within medical scales: scale name, measurement concept, and measurement item. The scale name refers to the official or widely recognized title of the medical scale used in MBC, such as “The M. D. Anderson Symptom Inventory.” The measurement concept is defined as the broader theoretical or clinical construct that the scale is designed to assess, such as anxiety or cognitive function. The measurement item, on the other hand, refers to the individual questions within the scale that evaluate specific aspects of the measurement concept.</p>
        <p>We began by retrieving abstracts of Chinese core medical journal papers from the China National Knowledge Infrastructure [<xref ref-type="bibr" rid="ref30">30</xref>], which is a Chinese academic journal full-text database, targeting scale development research. The search was conducted using the “Abstract” and “Chinese Library Classification” criteria. From the retrieved papers, we selected the top three subfields within the Chinese Library Classification R code (Medicine and Health) based on literature frequency. Each abstract was manually reviewed to ensure the inclusion of original research papers, and the corresponding full texts were obtained in XML format. A detailed analysis of these full texts revealed that the Methods, Results, and Discussion sections contained a higher density of mentions related to scale names, measurement concepts, and items. Compared to scale names, mentions of concepts and items were less frequent, with items being particularly sparse.</p>
        <p>To improve the balance and density of these entities, we extracted paragraphs specifically from the Methods, Results, and Discussion sections based on their XML structure. We then used key clue words such as “dimension,” “domain,” “variable,” “concept,” “factor,” “item,” and “entry” to identify paragraphs likely to contain the targeted entities. Paragraphs containing these terms were retained for annotation, while others were excluded.</p>
        <p>For data annotation, we used the Label Studio tool [<xref ref-type="bibr" rid="ref31">31</xref>]. Prior to formal annotation, a preannotation phase was conducted to train annotators. During this phase, annotators were introduced to the annotation scheme, guidelines (summarized in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>), and tools. Feedback from this stage was used to refine both the scheme and guidelines through discussions. In the formal annotation phase, each paper was independently annotated by two annotators. A third annotator then checked for consistency, corrected discrepancies based on either annotator’s results, addressed missed annotations, and documented uncertain cases, which were later resolved through group discussions. Cohen κ coefficient was calculated to assess annotation consistency, yielding an overall score of 0.95, indicating a high level of reliability for the constructed dataset [<xref ref-type="bibr" rid="ref32">32</xref>]. Specifically, the type-specific Cohen κ values were 0.961 for scale entities, 0.950 for concepts, and 0.970 for items.</p>
      </sec>
      <sec>
        <title>Design and Implementation of the MedScaleNER Framework</title>
        <sec>
          <title>Overview</title>
          <p>We developed the MedScaleNER prompt framework to identify scale-related entities in medical texts using LLMs. The framework is designed to optimize entity recognition by incorporating three key stages: zero-shot entity type recognition, few-shot entity labeling, and self-verification. To enhance LLM performance, MedScaleNER integrates CoT prompting, which helps guide the model step by step through complex tasks, reducing the cognitive load. This is achieved by first identifying entity types in a zero-shot setting and then labeling the entities with a few examples. To further improve contextual understanding, the framework dynamically retrieves relevant examples using k-nearest neighbors (KNN) and uses self-verification to minimize hallucinations and overprediction, which are common issues in NER tasks [<xref ref-type="bibr" rid="ref33">33</xref>]. <xref rid="figure2" ref-type="fig">Figure 2</xref> outlines the MedScaleNER prompt framework, which consists of four main components: demonstration retrieval, entity type recognition, entity labeling, and self-verification.</p>
          <fig id="figure2" position="float">
            <label>Figure 2</label>
            <caption>
              <p>Design of the MedScaleNER prompt framework. KNN: k-nearest neighbors.</p>
            </caption>
            <graphic xlink:href="jmir_v27i1e67033_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
        </sec>
        <sec>
          <title>Step 1: Zero-Shot Entity Type Recognition</title>
          <p>In the first step, we design a prompt that includes four essential components: task description, entity definitions, output format, and input text [<xref ref-type="bibr" rid="ref25">25</xref>]. Previous research in medical entity recognition using LLMs emphasizes the critical importance of prompt design, especially in specialized tasks [<xref ref-type="bibr" rid="ref34">34</xref>]. Therefore, in this step, the model is framed as a “sophisticated linguist and named entity annotation expert” and tasked with identifying and listing entity types present in the input text without examples. This is the zero-shot stage, where the LLM uses only the provided definitions to perform entity-type recognition without any prior labeled examples.</p>
          <p>To improve the model’s understanding of domain-specific terminology, we provide clear definitions and possible forms for each entity type. This enables the LLM to comprehend and distinguish between different medical scale–related entities. The output is structured as a list of identified entity types, with explicit instructions to avoid unnecessary information, limit responses to the provided entity list, and return “{null}” if no entities are found. The output is structured as a list of identified entity types present in the input text, without repeating types for multiple occurrences. For instance, if the entity type “scale” appears multiple times in the input, it is represented only once in the output to indicate its presence. Step 1 is instructed to limit responses to the provided entity type list and return “{null}” if no entities are found. By guiding the model through these structured prompts, we leverage CoT prompting to break down the task into manageable steps for better performance (detailed in <xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>).</p>
        </sec>
        <sec>
          <title>Step 2: Few-Shot Entity Labeling</title>
          <p>The second step builds upon the output of step 1 by introducing few-shot entity labeling. The prompt in this stage includes the task description, entity definitions, examples (demonstrations), output format, and the input sentence. The role of the LLM remains that of a named entity annotation expert. Now, the task is to label the entities that match the entity types identified in step 1 within the input text.</p>
          <p>Here, we use few-shot prompting because providing a small number of high-quality examples typically boosts performance, especially in specialized tasks like medical NER [<xref ref-type="bibr" rid="ref35">35</xref>]. Few-shot prompting often achieves results comparable to those of fine-tuned models trained on hundreds of samples. However, it is sensitive to the representativeness of the examples, as well as the length of the input. To mitigate these challenges, we use KNN retrieval to dynamically select the most relevant examples from the training corpus. These examples, which are semantically similar to the input text, serve as demonstrations for the LLM to follow, guiding it in accurately labeling entities within the text. Moreover, we incorporate CoT prompting by breaking the task into incremental steps: first identifying entity types (step 1), followed by entity labeling (step 2). Step 1 involves identifying the entity types present in the input text, which informs the candidate pool for KNN retrieval in step 2. For instance, if step 1 determines that the entity types are {scale, concept}, step 2 specifically retrieves examples containing both scale and concept entities. The LLM surrounds the identified entities with appropriate markers in the text [<xref ref-type="bibr" rid="ref36">36</xref>], as illustrated in <xref rid="figure3" ref-type="fig">Figure 3</xref>, with detailed prompts provided in <xref ref-type="supplementary-material" rid="app3">Multimedia Appendix 3</xref>.</p>
          <fig id="figure3" position="float">
            <label>Figure 3</label>
            <caption>
              <p>Output format of entity labeling.</p>
            </caption>
            <graphic xlink:href="jmir_v27i1e67033_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
        </sec>
        <sec>
          <title>KNN Demonstration Retrieval</title>
          <p>For demonstration retrieval, we use KNN, a widely used method in ICL [<xref ref-type="bibr" rid="ref37">37</xref>]. The KNN algorithm retrieves semantically similar examples from the annotated dataset to provide the LLM with contextually relevant demonstrations. We begin by generating sentence-level embeddings for both the input texts and candidate examples using the SimCSE [<xref ref-type="bibr" rid="ref38">38</xref>] method implemented in the acge_text_embedding model [<xref ref-type="bibr" rid="ref39">39</xref>]. The cosine similarity between the input text and each candidate example is then computed, and the top k examples with the highest similarity scores are selected.</p>
          <p>To enable efficient retrieval from large-scale, high-dimensional embedding spaces, we use the FAISS library [<xref ref-type="bibr" rid="ref40">40</xref>]. FAISS provides specialized data structures and algorithms for fast indexing and searching of embeddings. After indexing the training embeddings, retrieving demonstrations for a given test sentence simply involves extracting its embedding and performing a semantic similarity search against the indexed embeddings. This approach significantly reduces computational overhead by eliminating exhaustive pairwise comparisons.</p>
          <p>To determine the optimal value of <italic>k</italic>, we conducted experiments with a range of different <italic>k</italic> values. We selected the value that maximized model performance by balancing the need for enough examples to help the LLM generalize while avoiding performance degradation caused by irrelevant or excessive examples. These examples are fed to the LLM to improve generalization and entity labeling.</p>
        </sec>
        <sec>
          <title>Step 3: Few-Shot Self-Verification</title>
          <p>The final step incorporates a self-verification mechanism to improve the accuracy and reliability of the labeled entities. After completing the entity labeling, the LLM undergoes a self-verification process through few-shot prompting, where it reviews its own output for correctness. In this step, the model’s task in this step is to verify whether the labeled entities are accurate by responding with either “Yes” or “No” for each entity. If the answer is “No,” the entity is removed from the output.</p>
          <p>The input for this step includes both the labeled entities and their surrounding context, allowing the LLM to validate its previous output holistically. This self-verification step improves the reliability of the final results, ensuring that the identified and labeled entities meet the expected standards of accuracy (detailed prompts are provided in <xref ref-type="supplementary-material" rid="app4">Multimedia Appendix 4</xref>).</p>
        </sec>
      </sec>
      <sec>
        <title>In-Depth Evaluation and Comparison</title>
        <p>To comprehensively assess the performance of MedScaleNER, we conducted an in-depth analysis. Before the formal experiments, we first identified the best-performing LLM for use in MedScaleNER by comparing various LLMs accessed via the application programming interface (API). Following this, we compared MedScaleNER’s performance with that of locally fine-tuned LLMs on the NER task. Additionally, we carried out ablation studies focusing on the two key components of MedScaleNER: CoT prompting in step 1 (zero-shot entity type recognition) and self-verification in step 3. By isolating these components, we examined their individual contributions to the overall framework, specifically their impact on entity recognition accuracy and output robustness. These studies provided valuable insights into the importance of each step in enhancing model reliability and precision.</p>
        <p>Furthermore, we evaluated MedScaleNER in low-resource settings by varying the amount of training data and the number of demonstrations in the few-shot setting (steps 2 and 3). This analysis was essential for understanding how the framework performs under limited data conditions and testing its scalability and effectiveness when annotation resources are scarce. By experimenting with different proportions of available data and examples, we gained insights into the adaptability of MedScaleNER in resource-constrained scenarios.</p>
        <p>For evaluation, we used precision, recall, and macro <italic>F</italic><sub>1</sub>-score. Precision represents the proportion of correctly predicted entities out of all entities predicted by the model. Recall is the proportion of correctly predicted entities out of all actual entities present in the dataset. Macro <italic>F</italic><sub>1</sub>-score is the harmonic mean of precision and recall, averaged across all entity classes to account for imbalanced class distributions. We determined the correctness of entity recognition using exact string matching, meaning only perfect matches between the model’s predictions and the ground truth were considered correct. This strict evaluation method ensured a high standard for assessing model performance across all comparisons, providing a clear and objective measure of MedScaleNER’s effectiveness.</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Summary Statistics of CMedS-NER</title>
        <p>We constructed the CMedS-NER (Chinese Medical Scale Corpus for Named Entity Recognition) dataset specifically for the NER task in the context of Chinese medical scales. The dataset consists of 720 full-text Chinese academic papers focused on medical scales, which include 5582 paragraphs and 22,743 sentences. After conducting a concordance test and making necessary emendations, CMedS-NER contained a total of 27,499 entity mentions. These consisted of 12,340 mentions of scales, 11,968 mentions of concepts, and 3191 mentions of items. For evaluation purposes, the dataset was randomly split at the document level into 90% for training and 10% for testing. Detailed characteristic statistics of the training and test data are presented in <xref ref-type="table" rid="table1">Table 1</xref>.</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Statistics of training and test data.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="160"/>
            <col width="320"/>
            <col width="260"/>
            <col width="230"/>
            <thead>
              <tr valign="top">
                <td colspan="2">Data type</td>
                <td>Training set, n (%)</td>
                <td>Test set, n (%)</td>
                <td>Total, n (%)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="2">Papers</td>
                <td>648 (90)</td>
                <td>72 (10)</td>
                <td>720 (100)</td>
              </tr>
              <tr valign="top">
                <td colspan="2">Paragraphs</td>
                <td>5055 (90.56)</td>
                <td>527 (9.44)</td>
                <td>5582 (100)</td>
              </tr>
              <tr valign="top">
                <td colspan="2">Sentences</td>
                <td>20,454 (89.94)</td>
                <td>2289 (10.06)</td>
                <td>22,743 (100)</td>
              </tr>
              <tr valign="top">
                <td colspan="5">
                  <bold>Entities</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Scale</td>
                <td>11,106 (90)</td>
                <td>1234 (10)</td>
                <td>12,340 (100)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Concept</td>
                <td>10,836 (90.54)</td>
                <td>1132 (9.46)</td>
                <td>11,968 (100)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Item</td>
                <td>2947 (92.35)</td>
                <td>244 (7.65)</td>
                <td>3191 (100)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>All</td>
                <td>24,889 (90.51)</td>
                <td>2610 (9.49)</td>
                <td> 27,499 (100)</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
      </sec>
      <sec>
        <title>LLM Selection and Experimental Setup</title>
        <p>To determine the best-performing LLM for the MedScaleNER framework, we conducted preliminary experiments with six generative LLMs: GPT-3.5-turbo, GLM-4-0520, ERNIE-Bot-turbo, Moonshot-v1-8k, AGI Sky-Chat-3.0, and Qwen-turbo-0624. These models were accessed via APIs and evaluated on randomly selected sentences from the CMedS-NER test set, which included ten scale entities. Among the tested models, GLM-4-0520 performed the best, accurately recognizing nine out of ten scale entities, Qwen-turbo-0624 followed, identifying eight entities (complete results are provided in <xref ref-type="supplementary-material" rid="app5">Multimedia Appendix 5</xref>). Based on this superior performance, GLM-4-0520 was selected for subsequent experiments. For the GLM-4-0520 setup, we used temperature sampling, setting the temperature parameter to 0.02 and the max_tokens parameter to 2048, while leaving all other hyperparameters at their default values.</p>
        <p>In addition to evaluating the API-accessed LLM, we implemented local fine-tuning for four models: GLM-4-9B-Chat [<xref ref-type="bibr" rid="ref41">41</xref>], Qwen2-7B [<xref ref-type="bibr" rid="ref42">42</xref>], BiLSTM-CRF [<xref ref-type="bibr" rid="ref43">43</xref>] (Chinese-BERT-wwm), and W2NER [<xref ref-type="bibr" rid="ref44">44</xref>] (MacBERT). Fine-tuning was performed using Pytorch 1.12.1+cu11.6 on NVIDIA RTX A6000 graphics processing units. For these locally fine-tuned models, hyperparameters were optimized using empirical tuning methods to achieve the best performance on the CMedS-NER dataset. Detailed hyperparameter settings for each model are provided in <xref ref-type="supplementary-material" rid="app6">Multimedia Appendix 6</xref>.</p>
      </sec>
      <sec>
        <title>Performance Comparisons</title>
        <sec>
          <title>Optimal k-Shot Demonstration Selection</title>
          <p>To determine the optimal number of demonstrations (k) for few-shot learning, we tested various k-shot settings (0, 5, 10, 15, 20, and 25) on a randomly selected set of 100 sentences from the test set. As shown in <xref ref-type="table" rid="table2">Table 2</xref>, overall performance improved as the number of demonstrations increased, with the highest <italic>F</italic><sub>1</sub>-score of 81.23% achieved at 20-shot. However, different entity types peaked at different k values. For instance, the <italic>F</italic><sub>1</sub>-score for concepts peaked at 10-shot (85.07%), while the <italic>F</italic><sub>1</sub>-score for scales and items reached their highest performance at 20-shot, with scores of 77.27% and 84.21%, respectively.</p>
          <table-wrap position="float" id="table2">
            <label>Table 2</label>
            <caption>
              <p>Entity extraction performance with different k-shot values.</p>
            </caption>
            <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
              <col width="90"/>
              <col width="200"/>
              <col width="230"/>
              <col width="260"/>
              <col width="220"/>
              <thead>
                <tr valign="top">
                  <td>k-shot</td>
                  <td>All, <italic>F</italic><sub>1</sub>-score (%)</td>
                  <td>Scale, <italic>F</italic><sub>1</sub>-score (%)</td>
                  <td>Concept, <italic>F</italic><sub>1</sub>-score (%)</td>
                  <td>Item, <italic>F</italic><sub>1</sub>-score (%)</td>
                </tr>
              </thead>
              <tbody>
                <tr valign="top">
                  <td>0</td>
                  <td>17.11</td>
                  <td>23.26</td>
                  <td>28.07</td>
                  <td>0</td>
                </tr>
                <tr valign="top">
                  <td>5</td>
                  <td>68.74</td>
                  <td>61.87</td>
                  <td>74.34</td>
                  <td>70</td>
                </tr>
                <tr valign="top">
                  <td>10</td>
                  <td>67.59</td>
                  <td>67.69</td>
                  <td>
                    <italic>85.07</italic>
                    <sup>a</sup>
                  </td>
                  <td>50</td>
                </tr>
                <tr valign="top">
                  <td>15</td>
                  <td>78.55</td>
                  <td>71.88</td>
                  <td>83.78</td>
                  <td>80</td>
                </tr>
                <tr valign="top">
                  <td>20</td>
                  <td>
                    <italic>81.23</italic>
                  </td>
                  <td>
                    <italic>77.27</italic>
                  </td>
                  <td>82.19</td>
                  <td>
                    <italic>84.21</italic>
                  </td>
                </tr>
                <tr valign="top">
                  <td>25</td>
                  <td>71.06</td>
                  <td>75.19</td>
                  <td>78.73</td>
                  <td> 59.26</td>
                </tr>
              </tbody>
            </table>
            <table-wrap-foot>
              <fn id="table2fn1">
                <p><sup>a</sup>The best performance is italicized.</p>
              </fn>
            </table-wrap-foot>
          </table-wrap>
        </sec>
        <sec>
          <title>Low-Resource Comparison</title>
          <p>To evaluate MedScaleNER’s performance in low-resource scenarios, we trained the model using different proportions of the training data (1%, 5%, 10%, 50%, and 100%) and assessed its performance on the test set. As presented in <xref rid="figure4" ref-type="fig">Figure 4</xref>A, the overall <italic>F</italic><sub>1</sub>-score increased as more training data was used. Notably, significant performance gains were observed when increasing the training data from 1% (205 sentences) to 5% (1023 sentences), with the overall <italic>F</italic><sub>1</sub>-score rising from 48.22% to 58.29%, precision improving from 52.33% to 57.40%, and recall jumping from 45.35% to 60.06%. Beyond this point, improvements plateaued, with only a 1.35% increase in the <italic>F</italic><sub>1</sub>-score between 5% and 100% of the training data (from 58.29% to 59.64%). A similar trend was observed for precision and recall, although precision dropped slightly at 10% of the training data. When examining scale and concept entities (<xref rid="figure4" ref-type="fig">Figure 4</xref>B and C), the same pattern emerged: a significant improvement from 1% to 5% of the training data, followed by minimal gains from 5% to 100%. However, for item entities (<xref rid="figure4" ref-type="fig">Figure 4</xref>D), precision, recall, and <italic>F</italic><sub>1</sub>-scores slightly declined as the training data increased from 5% to 100%.</p>
          <fig id="figure4" position="float">
            <label>Figure 4</label>
            <caption>
              <p>Low-resource performance of MedScaleNER: precision (P), recall (R), and macro <italic>F</italic><sub>1</sub>-scores (<italic>F</italic><sub>1</sub>) across different proportions of training data for (A) all entities, (B) scales, (C) concepts, and (D) items.</p>
            </caption>
            <graphic xlink:href="jmir_v27i1e67033_fig4.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
        </sec>
        <sec>
          <title>Ablation Studies</title>
          <p>To evaluate the contributions of different components within the MedScaleNER framework, we conducted ablation studies to examine the impact of CoT prompting and self-verification under various low-resource settings. We tested the GLM-4-0520’s performance with and without these components using different proportions of the training data (1%, 5%, 10%, 50%, and 100%). The baseline model involved directly prompting the task description and labeling entities in the input using 20 examples.</p>
          <p>As shown in <xref ref-type="table" rid="table3">Table 3</xref>, incorporating self-verification consistently improved <italic>F</italic><sub>1</sub>-scores across all training data sizes. With 100% of the training data, self-verification led to a 0.89% increase in the <italic>F</italic><sub>1</sub>-score, while breaking down the task into subtasks using CoT resulted in a 2.11% increase. However, in extremely low-resource scenarios (eg, 1% of the training data), adding CoT without self-verification did not enhance performance and slightly decreased the <italic>F</italic><sub>1</sub>-score. The combination of both CoT and self-verification yielded the best performance across all training data sizes. In low-resource settings, self-verification had a significant impact. It increased the <italic>F</italic><sub>1</sub>-score of MedScaleNER by 7.92% with 1% of the training data and by 3.27% with 5% of the training data, compared to configurations without it (ie, baseline + CoT).</p>
          <table-wrap position="float" id="table3">
            <label>Table 3</label>
            <caption>
              <p>Ablation study results: P<sup>a</sup>, R<sup>b</sup>, and <italic>F</italic><sub>1</sub><sup>c</sup> scores for different components of MedScaleNER across different proportions of training data.</p>
            </caption>
            <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
              <col width="110"/>
              <col width="70"/>
              <col width="60"/>
              <col width="100"/>
              <col width="0"/>
              <col width="70"/>
              <col width="60"/>
              <col width="100"/>
              <col width="0"/>
              <col width="70"/>
              <col width="60"/>
              <col width="100"/>
              <col width="0"/>
              <col width="70"/>
              <col width="70"/>
              <col width="60"/>
              <thead>
                <tr valign="bottom">
                  <td>Proportion of training data (n of sentences)</td>
                  <td colspan="4">Baseline</td>
                  <td colspan="4">Baseline + self-verification</td>
                  <td colspan="4">Baseline+ CoT<sup>d</sup></td>
                  <td colspan="3">MedScaleNER</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>P (%)</td>
                  <td>R (%)</td>
                  <td><italic>F</italic><sub>1</sub> (%)</td>
                  <td colspan="2">P (%)</td>
                  <td>R (%)</td>
                  <td><italic>F</italic><sub>1</sub> (%)</td>
                  <td colspan="2">P (%)</td>
                  <td>R (%)</td>
                  <td><italic>F</italic><sub>1</sub> (%)</td>
                  <td colspan="2">P (%)</td>
                  <td>R (%)</td>
                  <td><italic>F</italic><sub>1</sub> (%)</td>
                </tr>
              </thead>
              <tbody>
                <tr valign="top">
                  <td>1% (205)</td>
                  <td>48.21</td>
                  <td>45.70</td>
                  <td>46.63</td>
                  <td colspan="2">51.42</td>
                  <td>45.59</td>
                  <td>48.14</td>
                  <td colspan="2">36.52</td>
                  <td>45.46</td>
                  <td>40.30</td>
                  <td colspan="2">52.33</td>
                  <td>45.35</td>
                  <td>48.22</td>
                </tr>
                <tr valign="top">
                  <td>5% (1023)</td>
                  <td>44.21</td>
                  <td>66.41</td>
                  <td>52.35</td>
                  <td colspan="2">47.61</td>
                  <td>66.71</td>
                  <td>55.07</td>
                  <td colspan="2">51.10</td>
                  <td>60.20</td>
                  <td>55.02</td>
                  <td colspan="2">57.40</td>
                  <td>60.06</td>
                  <td>58.29</td>
                </tr>
                <tr valign="top">
                  <td>10% (2045)</td>
                  <td>45.88</td>
                  <td>66.11</td>
                  <td>54.37</td>
                  <td colspan="2">47.99</td>
                  <td>65.54</td>
                  <td>55.03</td>
                  <td colspan="2">50.84</td>
                  <td>60.95</td>
                  <td>55.13</td>
                  <td colspan="2">56.22</td>
                  <td>60.84</td>
                  <td>58.11</td>
                </tr>
                <tr valign="top">
                  <td>50% (10,227)</td>
                  <td>45.12</td>
                  <td>68.08</td>
                  <td>54.32</td>
                  <td colspan="2">48.86</td>
                  <td>68.41</td>
                  <td>56.76</td>
                  <td colspan="2">52.49</td>
                  <td>62.16</td>
                  <td>56.69</td>
                  <td colspan="2">57.41</td>
                  <td>61.83</td>
                  <td>59.23</td>
                </tr>
                <tr valign="top">
                  <td>100% (20,454)</td>
                  <td>45.21</td>
                  <td>69.50</td>
                  <td>54.78</td>
                  <td colspan="2">47.79</td>
                  <td>69.31</td>
                  <td>55.67</td>
                  <td colspan="2">53.11</td>
                  <td>61.73</td>
                  <td>56.89</td>
                  <td colspan="2">58.36</td>
                  <td>61.59</td>
                  <td> 59.64</td>
                </tr>
              </tbody>
            </table>
            <table-wrap-foot>
              <fn id="table3fn1">
                <p><sup>a</sup>P: precision.</p>
              </fn>
              <fn id="table3fn2">
                <p><sup>b</sup>R: recall.</p>
              </fn>
              <fn id="table3fn3">
                <p><sup>c</sup><italic>F</italic><sub>1</sub>: macro <italic>F</italic><sub>1</sub>-score.</p>
              </fn>
              <fn id="table3fn4">
                <p><sup>d</sup>CoT: chain-of-thought.</p>
              </fn>
            </table-wrap-foot>
          </table-wrap>
        </sec>
        <sec>
          <title>Comparison With Local Fine-Tuned Models</title>
          <p>We compared MedScaleNER with several locally fine-tuned models on the CMedS-NER dataset, including GLM-4-9B-Chat, Qwen2-7B, BiLSTM-CRF (Chinese-BERT-wwm), and W2NER (MacBERT). Both GLM-4-9B-Chat and Qwen2-7B were fine-tuned using the low-rank adaptation method with a parameter-efficient fine-tuning strategy with identical fine-tuning parameters, ensuring a fair comparison. After fine-tuning, we prompted the fine-tuned GLM-4-9B-Chat and Qwen2-7B for the NER task using a similar prompt structure as our Baseline, but without KNN retrieval.</p>
          <p>As shown in <xref rid="figure5" ref-type="fig">Figure 5</xref>, MedScaleNER achieved an overall <italic>F</italic><sub>1</sub>-score of 59.64%, which is lower than the fine-tuned Qwen2-7B (79.91%), GLM-4-9B-Chat (80.34%), BiLSTM-CRF (80.99%), and W2NER (81.38%). Notably, under low-resource scenarios (eg, using only 1% of the training data), MedScaleNER significantly outperformed the other fine-tuned models. At 5% of the training data, while MedScaleNER’s <italic>F</italic><sub>1</sub>-score was lower than W2NER and BiLSTM-CRF, it remained substantially higher than Qwen2-7B and stayed competitive with GLM-4-9B-Chat.</p>
          <fig id="figure5" position="float">
            <label>Figure 5</label>
            <caption>
              <p><italic>F</italic><sub>1</sub>-score comparison of MedScaleNER and locally fine-tuned models across different proportions of training data.</p>
            </caption>
            <graphic xlink:href="jmir_v27i1e67033_fig5.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
        </sec>
      </sec>
      <sec>
        <title>Error Analysis</title>
        <p>We conducted an error analysis by manually reviewing 300 randomly selected sentences from the model outputs to identify common types of mistakes and areas for improvement. The errors were classified into four main types: (1) identification errors, where nonentity terms were incorrectly identified as entities; (2) type errors, where entities were correctly identified but assigned the wrong entity type; (3) boundary errors, which involved incorrect determination of the start and end positions of entities; and (4) missing entities, where entities present in the text were not identified by the model.</p>
        <p><xref rid="figure6" ref-type="fig">Figure 6</xref> provides examples of each error type, illustrating the nature of these mistakes. Identification errors were the most common and often resulted from ambiguous entity definitions. For example, generic terms like “item” or “scale” were sometimes misinterpreted as specific entities due to their inclusion in prompt definitions. Type errors occurred when entities were recognized but misclassified. For instance, “overall evaluation of the quality of nursing services” was mistakenly labeled as a concept rather than an item. Boundary errors included incorrect inclusion or exclusion of surrounding text or punctuation, such as parentheses or modifiers that should not be part of the entity span. Finally, missing entities were frequently associated with English names or abbreviations of scales and items, especially in cases involving long or complex strings.</p>
        <fig id="figure6" position="float">
          <label>Figure 6</label>
          <caption>
            <p>Examples of the four main error types identified in MedScaleNER: identification errors, type errors, boundary errors, and missing entities.</p>
          </caption>
          <graphic xlink:href="jmir_v27i1e67033_fig6.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <p>This study is among the first to explore the use of LLMs and prompt engineering for NER tasks related to Chinese medical scales. We proposed a novel prompt framework, MedScaleNER, which enhances the adaptive learning capabilities of LLMs by dynamically retrieving optimal examples through KNN retrieval. By using a CoT strategy, the framework decomposes the complex task of entity recognition into two sequential steps: first, identifying entity types and then labeling entities. This approach strengthens the logical reasoning ability of LLMs. Additionally, incorporating self-verification mechanisms ensures the accuracy of the final recognition results, improving the reliability of the model’s outputs.</p>
        <p>Our evaluation of the self-constructed CMedS-NER dataset demonstrated that MedScaleNER effectively recognizes medical scale–related entities. The dataset, comprising 720 full-text Chinese academic papers with 27,499 annotated entities, is a high-quality resource for training and evaluating NER models in this specialized domain. Notably, in low-resource settings with as few as 205 sentences, MedScaleNER outperformed locally fine-tuned models such as BiLSTM-CRF (Chinese-BERT-wwm), W2NER (MacBERT), GLM-4-9B-Chat, and Qwen2-7B. When more annotated data became available (eg, 1023 sentences), MedScaleNER remained competitive. This low-resource performance is particularly significant in biomedical and clinical contexts, where domain-specific annotations are often expensive and time-consuming to produce.</p>
        <p>Ablation studies further highlighted that KNN retrieval significantly improved performance in low-resource settings, aligning with previous findings [<xref ref-type="bibr" rid="ref37">37</xref>] on the benefits of such strategies in ICL. Integrating CoT prompting and self-verification with KNN retrieval boosted the <italic>F</italic><sub>1</sub>-score by approximately 6% when using 5% of the training data. This suggests that while retrieving representative examples is crucial, the structured CoT and self-verification steps are also important, contributing to more accurate and robust entity annotation than retrieval-based demonstration alone.</p>
        <p>Although using high-quality demonstrations improved the LLM’s ability to recognize scale-related entities [<xref ref-type="bibr" rid="ref25">25</xref>,<xref ref-type="bibr" rid="ref45">45</xref>], performance declined when the number of examples exceeded an optimal threshold. Context length limitations, example ordering [<xref ref-type="bibr" rid="ref46">46</xref>], and entity-type specific sensitivities influenced this trade-off. For instance, concept entities benefited from fewer examples compared to scale and item entities. It suggests that tailoring demonstration strategies by entity type could maximize performance.</p>
        <p>Compared to traditional and fine-tuned NER methods designed for similar biomedical contexts, MedScaleNER offers several advantages. Conventional approaches often require extensive domain adaptation, large annotated corpora, or multiple rounds of fine-tuning to achieve competitive results [<xref ref-type="bibr" rid="ref47">47</xref>,<xref ref-type="bibr" rid="ref48">48</xref>]. In contrast, MedScaleNER excels under low-resource settings by leveraging KNN retrieval, CoT, and self-verification. Its flexible, task-oriented design allows simple modification of entity definitions to adapt to new domains, other languages, or even other LLM backbones. This adaptability supports broader generalizability, enabling MedScaleNER to scale beyond Chinese medical scales to other medical domains and even entirely different biomedical NER tasks.</p>
        <p>Moreover, moving toward a human-centered medical scale NER workflow is crucial [<xref ref-type="bibr" rid="ref23">23</xref>,<xref ref-type="bibr" rid="ref49">49</xref>]. Allowing domain experts to provide feedback, customize prompt components, control retrieval parameters, and determine when to use self-verification can improve transparency, trust, and overall user satisfaction [<xref ref-type="bibr" rid="ref50">50</xref>,<xref ref-type="bibr" rid="ref51">51</xref>]. Such a human-in-the-loop approach ensures that MedScaleNER remains aligned with real-world clinical and research priorities, particularly important in dynamic health care environments.</p>
      </sec>
      <sec>
        <title>Limitations</title>
        <p>Despite these strengths, there are limitations to this study. First, we primarily focused on the GLM-4 model, and future work should evaluate additional LLMs [<xref ref-type="bibr" rid="ref52">52</xref>] such as LLaMA, Mistral, GPT, and PaLM, to validate generalizability. Second, our example retrieval strategy relied on KNN based on sentence similarity. Alternative retrieval strategies [<xref ref-type="bibr" rid="ref53">53</xref>] and more advanced similarity models may further enhance performance. While we focused on three main scale-related entity types: scale names, measurement concepts, and measurement items, future research could extend this framework to other entities, such as functions, targets, and validity measures. Finally, integrating LLMs with traditional NER models could leverage the complementary strengths of both approaches, potentially resulting in more robust and accurate entity recognition systems.</p>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>In this study, we introduced MedScaleNER, a task-oriented prompt framework that integrates demonstration retrieval, CoT prompting, and self-verification strategies to enhance the recognition of medical scale-related entities in Chinese medical literature. Evaluated on our self-constructed CMedS-NER dataset, MedScaleNER demonstrates robust performance even with limited annotated data. By allowing simple adjustments to prompt definitions, MedScaleNER readily adapts to diverse biomedical domains, languages, and entity types, making it a resource-efficient solution for broader information extraction challenges. This adaptability supports more efficient and reliable knowledge extraction, ultimately contributing to better clinical and research outcomes in MBC. By continuing to refine and expand MedScaleNER, we aim to advance automated knowledge extraction systems and promote the widespread adoption of MBC in health care.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>Annotation scheme and guidelines.</p>
        <media xlink:href="jmir_v27i1e67033_app1.docx" xlink:title="DOCX File , 17 KB"/>
      </supplementary-material>
      <supplementary-material id="app2">
        <label>Multimedia Appendix 2</label>
        <p>Prompts for step 1.</p>
        <media xlink:href="jmir_v27i1e67033_app2.docx" xlink:title="DOCX File , 18 KB"/>
      </supplementary-material>
      <supplementary-material id="app3">
        <label>Multimedia Appendix 3</label>
        <p>Prompts for step 2.</p>
        <media xlink:href="jmir_v27i1e67033_app3.docx" xlink:title="DOCX File , 17 KB"/>
      </supplementary-material>
      <supplementary-material id="app4">
        <label>Multimedia Appendix 4</label>
        <p>Prompts for step 3.</p>
        <media xlink:href="jmir_v27i1e67033_app4.docx" xlink:title="DOCX File , 18 KB"/>
      </supplementary-material>
      <supplementary-material id="app5">
        <label>Multimedia Appendix 5</label>
        <p>Preliminary experimental results for optimal model selection.</p>
        <media xlink:href="jmir_v27i1e67033_app5.docx" xlink:title="DOCX File , 17 KB"/>
      </supplementary-material>
      <supplementary-material id="app6">
        <label>Multimedia Appendix 6</label>
        <p>Hyperparameters of locally fine-tuned models.</p>
        <media xlink:href="jmir_v27i1e67033_app6.docx" xlink:title="DOCX File , 16 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">API</term>
          <def>
            <p>application programming interface</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">CMedS-NER</term>
          <def>
            <p>Chinese Medical Scale Corpus for Named Entity Recognition</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">CoT</term>
          <def>
            <p>chain-of-thought</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">ICL</term>
          <def>
            <p>in-context learning</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">KNN</term>
          <def>
            <p>k-nearest neighbor</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">LLM</term>
          <def>
            <p>large language model</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">MBC</term>
          <def>
            <p>measurement-based care</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">NER</term>
          <def>
            <p>named entity recognition</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>The authors would like to thank our colleagues Zhen Guo and Liu Shen. We are grateful to Zhen Guo for his help with the deployment of the labeling tool, and to Liu Shen for her cooperation in the acquisition of raw data. This work was supported by the National Social Science Fund of China (grant 21BTQ069), the Chinese Academy of Medical Sciences Innovation Fund for Medical Sciences (grants 2021-I2M-1-056 and 2021-I2M-1-057), and the National Key Research and Development Program of China (grant 2022YFC3601005).</p>
    </ack>
    <notes>
      <sec>
        <title>Data Availability</title>
        <p>The sample data and the source code are available on GitHub [<xref ref-type="bibr" rid="ref54">54</xref>].</p>
      </sec>
    </notes>
    <fn-group>
      <fn fn-type="con">
        <p>HS, JH, and ZC contributed to the concept and design of the study. HS, LZ, ZC, WZ, and Junlian L prepared, annotated, and curated the dataset. JH, HS, Jiao L, and QQ developed the methodology. JH, ZC, QP, and SC performed the experiments. HS, JH, ZC, and LZ interpreted the results. JH, HS, and ZC drafted the manuscript. LZ, Junlian L, Jiao L, and QQ revised the manuscript. All authors approved the final version of the manuscript.</p>
      </fn>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>DeVellis</surname>
              <given-names>RF</given-names>
            </name>
          </person-group>
          <source>Scale Development: Theory and Applications</source>
          <year>1991</year>
          <publisher-loc>Thousand Oaks, CA</publisher-loc>
          <publisher-name>Sage Publications, Inc</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lewis</surname>
              <given-names>CC</given-names>
            </name>
            <name name-style="western">
              <surname>Boyd</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Puspitasari</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Navarro</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Howard</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Kassab</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Hoffman</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Scott</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Lyon</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Douglas</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Simon</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Kroenke</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Implementing measurement-based care in behavioral health: a review</article-title>
          <source>JAMA Psychiatry</source>
          <year>2019</year>
          <volume>76</volume>
          <issue>3</issue>
          <fpage>324</fpage>
          <lpage>335</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/30566197"/>
          </comment>
          <pub-id pub-id-type="doi">10.1001/jamapsychiatry.2018.3329</pub-id>
          <pub-id pub-id-type="medline">30566197</pub-id>
          <pub-id pub-id-type="pii">2718629</pub-id>
          <pub-id pub-id-type="pmcid">PMC6584602</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Murphy</surname>
              <given-names>JK</given-names>
            </name>
            <name name-style="western">
              <surname>Michalak</surname>
              <given-names>EE</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Colquhoun</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Burton</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Fei</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>He</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Su</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Fang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Lam</surname>
              <given-names>RW</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Barriers and facilitators to implementing measurement-based care for depression in Shanghai, China: a situational analysis</article-title>
          <source>BMC Psychiatry</source>
          <year>2021</year>
          <volume>21</volume>
          <issue>1</issue>
          <fpage>430</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcpsychiatry.biomedcentral.com/articles/10.1186/s12888-021-03442-5"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12888-021-03442-5</pub-id>
          <pub-id pub-id-type="medline">34470624</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12888-021-03442-5</pub-id>
          <pub-id pub-id-type="pmcid">PMC8411506</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Jensen-Doss</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Haimes</surname>
              <given-names>EMB</given-names>
            </name>
            <name name-style="western">
              <surname>Smith</surname>
              <given-names>AM</given-names>
            </name>
            <name name-style="western">
              <surname>Lyon</surname>
              <given-names>AR</given-names>
            </name>
            <name name-style="western">
              <surname>Lewis</surname>
              <given-names>CC</given-names>
            </name>
            <name name-style="western">
              <surname>Stanick</surname>
              <given-names>CF</given-names>
            </name>
            <name name-style="western">
              <surname>Hawley</surname>
              <given-names>KM</given-names>
            </name>
          </person-group>
          <article-title>Monitoring treatment progress and providing feedback is viewed favorably but rarely used in practice</article-title>
          <source>Adm Policy Ment Health</source>
          <year>2018</year>
          <month>01</month>
          <volume>45</volume>
          <issue>1</issue>
          <fpage>48</fpage>
          <lpage>61</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/27631610"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/s10488-016-0763-0</pub-id>
          <pub-id pub-id-type="medline">27631610</pub-id>
          <pub-id pub-id-type="pii">10.1007/s10488-016-0763-0</pub-id>
          <pub-id pub-id-type="pmcid">PMC5495625</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Shoman</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Majery</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Otelea</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Lambreghts</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Guseva Canu</surname>
              <given-names>I</given-names>
            </name>
          </person-group>
          <article-title>How to identify the most suitable questionnaires and rating scales for your clinical practice or research?</article-title>
          <source>Int J Clin Pract</source>
          <year>2021</year>
          <month>12</month>
          <volume>75</volume>
          <issue>12</issue>
          <fpage>e14895</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/34541756"/>
          </comment>
          <pub-id pub-id-type="doi">10.1111/ijcp.14895</pub-id>
          <pub-id pub-id-type="medline">34541756</pub-id>
          <pub-id pub-id-type="pmcid">PMC9285901</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Jensen-Doss</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Hawley</surname>
              <given-names>KM</given-names>
            </name>
          </person-group>
          <article-title>Understanding clinicians' diagnostic practices: attitudes toward the utility of diagnosis and standardized diagnostic tools</article-title>
          <source>Adm Policy Ment Health</source>
          <year>2011</year>
          <month>11</month>
          <volume>38</volume>
          <issue>6</issue>
          <fpage>476</fpage>
          <lpage>85</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/21279679"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/s10488-011-0334-3</pub-id>
          <pub-id pub-id-type="medline">21279679</pub-id>
          <pub-id pub-id-type="pmcid">PMC6114089</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cheung</surname>
              <given-names>BS</given-names>
            </name>
            <name name-style="western">
              <surname>Murphy</surname>
              <given-names>JK</given-names>
            </name>
            <name name-style="western">
              <surname>Michalak</surname>
              <given-names>EE</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Lam</surname>
              <given-names>RW</given-names>
            </name>
          </person-group>
          <article-title>Barriers and facilitators to technology-enhanced measurement based care for depression among Canadian clinicians and patients: results of an online survey</article-title>
          <source>J Affect Disord</source>
          <year>2023</year>
          <month>01</month>
          <day>01</day>
          <volume>320</volume>
          <fpage>1</fpage>
          <lpage>6</lpage>
          <pub-id pub-id-type="doi">10.1016/j.jad.2022.09.055</pub-id>
          <pub-id pub-id-type="medline">36162664</pub-id>
          <pub-id pub-id-type="pii">S0165-0327(22)01058-8</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sun</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Hao</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Guo</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Shen</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Construction of a fine-grained knowledge element-based framework for knowledge representation in medical scale documents</article-title>
          <source>Digital Lib Forum</source>
          <year>2023</year>
          <volume>19</volume>
          <issue>12</issue>
          <fpage>86</fpage>
          <lpage>98</lpage>
          <pub-id pub-id-type="doi">10.3772/j.issn.1673-2286.2023.12.009</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wenqi</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Pu</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Yule</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Liang</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Xiaoyan</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Named entity recognition from Chinese medical literature based on deep learning method</article-title>
          <year>2023</year>
          <conf-name>China Automation Congress (CAC)</conf-name>
          <conf-date>November 19, 2023</conf-date>
          <conf-loc>Chongqing, China</conf-loc>
          <pub-id pub-id-type="doi">10.1109/CAC59555.2023.10451997</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yenduri</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Ramalingam</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Selvi</surname>
              <given-names>GC</given-names>
            </name>
            <name name-style="western">
              <surname>Supriya</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Srivastava</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Maddikunta,</surname>
              <given-names>PRK</given-names>
            </name>
            <name name-style="western">
              <surname>Raj</surname>
              <given-names>GD</given-names>
            </name>
            <name name-style="western">
              <surname>Jhaveri</surname>
              <given-names>RH</given-names>
            </name>
            <name name-style="western">
              <surname>Prabadevi</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Vasilakos</surname>
              <given-names>AV</given-names>
            </name>
            <name name-style="western">
              <surname>Gadekallu</surname>
              <given-names>TR</given-names>
            </name>
          </person-group>
          <article-title>Generative pre-trained transformer: a comprehensive review on enabling technologies, potential applications, emerging challenges, and future directions</article-title>
          <source>ArXiv. Preprint posted online on May 11, 2013</source>
          <year>2023</year>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/2305.10435v2"/>
          </comment>
          <pub-id pub-id-type="doi">10.1109/access.2024.3389497</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Du</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Qian</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Ding</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Qiu</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Tang</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>GLM: general language model pretraining with autoregressive blank infilling</article-title>
          <source>ArXiv. Preprint posted online on March 18, 2021</source>
          <year>2021</year>
          <pub-id pub-id-type="doi">10.48550/arXiv.2103.10360</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Touvron</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Lavril</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Izacard</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Martinet</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Lachaux</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Lacroix</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Rozière</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Goyal</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Hambro</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Azhar</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Rodriguez</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Joulin</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Grave</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Lample</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>LLaMA: open and efficient foundation language models</article-title>
          <source>ArXiv. Preprint posted online on February 27, 2023</source>
          <year>2023</year>
          <pub-id pub-id-type="doi">10.48550/arXiv.2302.13971</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bai</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Bai</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Chu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Cui</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Dang</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Deng</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Fan</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Ge</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Han</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Hui</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Ji</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>MM</given-names>
            </name>
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Lu</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Qwen technical report</article-title>
          <source>ArXiv. Preprint posted online on September 28, 2023</source>
          <year>2023</year>
          <pub-id pub-id-type="doi">10.5260/chara.21.2.8</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rosenbloom</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>arXiv</article-title>
          <source>Charleston Adv</source>
          <year>2019</year>
          <month>10</month>
          <day>01</day>
          <volume>21</volume>
          <issue>2</issue>
          <fpage>8</fpage>
          <lpage>10</lpage>
          <pub-id pub-id-type="doi">10.5260/chara.21.2.8</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Dagdelen</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Dunn</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Walker</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Rosen</surname>
              <given-names>AS</given-names>
            </name>
            <name name-style="western">
              <surname>Ceder</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Persson</surname>
              <given-names>KA</given-names>
            </name>
            <name name-style="western">
              <surname>Jain</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Structured information extraction from scientific text with large language models</article-title>
          <source>Nat Commun</source>
          <year>2024</year>
          <month>02</month>
          <day>15</day>
          <volume>15</volume>
          <issue>1</issue>
          <fpage>1418</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/s41467-024-45563-x"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41467-024-45563-x</pub-id>
          <pub-id pub-id-type="medline">38360817</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41467-024-45563-x</pub-id>
          <pub-id pub-id-type="pmcid">PMC10869356</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Du</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Peng</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Keloth</surname>
              <given-names>VK</given-names>
            </name>
            <name name-style="western">
              <surname>Zuo</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Zhou</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Jiang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Lu</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Roberts</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Improving large language models for clinical named entity recognition via prompt engineering</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2024</year>
          <month>09</month>
          <day>01</day>
          <volume>31</volume>
          <issue>9</issue>
          <fpage>1812</fpage>
          <lpage>1820</lpage>
          <pub-id pub-id-type="doi">10.1093/jamia/ocad259</pub-id>
          <pub-id pub-id-type="medline">38281112</pub-id>
          <pub-id pub-id-type="pii">7590607</pub-id>
          <pub-id pub-id-type="pmcid">PMC11339492</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Brown</surname>
              <given-names>TB</given-names>
            </name>
            <name name-style="western">
              <surname>Mann</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Ryder</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Subbiah</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Kaplan</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Dhariwal</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Neelakantan</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Shyam</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Sastry</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Askell,</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Agarwal</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Herbert-Voss</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Krueger</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Henighan</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Child</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Language models are few-shot learners</article-title>
          <source>ArXiv</source>
          <year>2015</year>
          <pub-id pub-id-type="doi">10.5860/choice.189890</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chowdhery</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Narang</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Devlin</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Bosma</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Mishra</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Roberts</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Barham</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Chung</surname>
              <given-names>HW</given-names>
            </name>
            <name name-style="western">
              <surname>Sutton</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Gehrmann</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Schuh</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Shi</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Tsvyashchenko</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Maynez</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Rao</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Barnes</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Tay</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Shazeer</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Prabhakaran</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Reif</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>PaLM: scaling language modeling with pathways</article-title>
          <source>ArXiv. Preprint posted online on April 05, 2022</source>
          <year>2022</year>
          <pub-id pub-id-type="doi">10.48550/arXiv.2204.02311</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Shi</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Zhu</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Le</surname>
              <given-names>X</given-names>
            </name>
          </person-group>
          <article-title>Material information extraction based on localarge language model and prompt engineering</article-title>
          <source>Data Anal Knowl Discovery</source>
          <year>2024</year>
          <volume>8</volume>
          <issue>7</issue>
          <fpage>23</fpage>
          <lpage>31</lpage>
          <pub-id pub-id-type="doi">10.11925/infotech.2096-3467.2023.1119</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Jie</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Zhiqiang</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Haixia</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Zhenli</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Jiao</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>ChatGPT and zero-shot prompt-based structured information extraction for clinical scale items</article-title>
          <source>Lib Inf Serv</source>
          <year>2024</year>
          <volume>68</volume>
          <issue>22</issue>
          <fpage>139</fpage>
          <lpage>152</lpage>
          <pub-id pub-id-type="doi">10.12074/202408.00017V1</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Fan</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Ding</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Ning</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Yin</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Chua</surname>
              <given-names>TS</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>Q</given-names>
            </name>
          </person-group>
          <article-title>A survey on rag meeting LLMs: towards retrieval-augmented large language models</article-title>
          <year>2024</year>
          <conf-name>KDD '24: The 30th ACM SIGKDD Conference on Knowledge Discovery and Data Mining</conf-name>
          <conf-date>August 25-29, 2024</conf-date>
          <conf-loc>Barcelona, Spain</conf-loc>
          <fpage>6491</fpage>
          <lpage>6501</lpage>
          <pub-id pub-id-type="doi">10.1145/3637528.3671470</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wei</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Cui</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Cheng</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Xie</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Jiang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Han</surname>
              <given-names>W</given-names>
            </name>
          </person-group>
          <article-title>ChatIE: zero-shot information extraction via chatting with ChatGPT</article-title>
          <source>ArXiv. Preprint posted online on February 20, 2023</source>
          <year>2023</year>
          <pub-id pub-id-type="doi">10.48550/arXiv.2302.10205</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zaghir</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Naguib</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Bjelogrlic</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Névéol</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Tannier</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Lovis</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Prompt engineering paradigms for medical applications: scoping review</article-title>
          <source>J Med Internet Res</source>
          <year>2024</year>
          <volume>26</volume>
          <fpage>e60501</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2024//e60501/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/60501</pub-id>
          <pub-id pub-id-type="medline">39255030</pub-id>
          <pub-id pub-id-type="pii">v26i1e60501</pub-id>
          <pub-id pub-id-type="pmcid">PMC11422740</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ge</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Guo</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>YC</given-names>
            </name>
            <name name-style="western">
              <surname>Al-Garadi</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Sarker</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>A comparison of few-shot and traditional named entity recognition models for medical text</article-title>
          <year>2022</year>
          <conf-name>2022 IEEE 10th International Conference on Healthcare Informatics (ICHI)</conf-name>
          <conf-date>June 11-14, 2022</conf-date>
          <conf-loc>Rochester, MN</conf-loc>
          <fpage>84</fpage>
          <lpage>89</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/37641590"/>
          </comment>
          <pub-id pub-id-type="doi">10.1109/ichi54592.2022.00024</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Sun</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Ouyang</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>GPT-NER: named entity recognition via large language models</article-title>
          <source>ArXiv. Preprint posted online on April 20, 2023</source>
          <year>2023</year>
          <pub-id pub-id-type="doi">10.48550/arXiv.2304.10428</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Le</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Generating representative samples for few-shot classification</article-title>
          <year>2022</year>
          <conf-name>2022 IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)</conf-name>
          <conf-date>June 18-24, 2022</conf-date>
          <conf-loc>New Orleans, LA</conf-loc>
          <fpage>8993</fpage>
          <lpage>9003</lpage>
          <pub-id pub-id-type="doi">10.1109/cvpr52688.2022.00880</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Li</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Zhou</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>RT: a retrieving and chain-of-thought framework for few-shot medical named entity recognition</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2024</year>
          <volume>31</volume>
          <issue>9</issue>
          <fpage>1929</fpage>
          <lpage>1938</lpage>
          <pub-id pub-id-type="doi">10.1093/jamia/ocae095</pub-id>
          <pub-id pub-id-type="medline">38708849</pub-id>
          <pub-id pub-id-type="pii">7665312</pub-id>
          <pub-id pub-id-type="pmcid">PMC11339512</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gero</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Singh</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Cheng</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Naumann</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Galley</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Gao</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Poon</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Self-verification improves few-shot clinical information extraction</article-title>
          <source>ArXiv. Preprint posted online on May 30, 2023</source>
          <year>2023</year>
          <pub-id pub-id-type="doi">10.48550/arXiv.2306.00024</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wen</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Jia</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Zhu</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Medical named entity recognition from un-labelled medical records based on pre-trained language models and domain dictionary</article-title>
          <source>Data Intell</source>
          <year>2021</year>
          <volume>3</volume>
          <issue>3</issue>
          <fpage>417</fpage>
          <pub-id pub-id-type="doi">10.1162/dint_a_00105</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="web">
          <source>China National Knowledge Infrastructure</source>
          <access-date>2024-09-28</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.cnki.net/">https://www.cnki.net/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="web">
          <article-title>Open source data labeling</article-title>
          <source>Label Studio</source>
          <access-date>2024-05-01</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://humansignal.com/amplitude/">https://humansignal.com/amplitude/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Artstein</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Poesio</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Inter-coder agreement for computational linguistics</article-title>
          <source>Comput Ling</source>
          <year>2008</year>
          <volume>34</volume>
          <issue>4</issue>
          <fpage>555</fpage>
          <lpage>596</lpage>
          <pub-id pub-id-type="doi">10.1162/coli.07-034-r2</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wei</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Guo</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Jiang</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Detecting and mitigating the ungrounded hallucinations in text generation by LLMs</article-title>
          <year>2024</year>
          <conf-name>AISNS '23: Proceedings of the 2023 International Conference on Artificial Intelligence, Systems and Network Security</conf-name>
          <conf-date>June 01, 2024</conf-date>
          <conf-loc>Mianyang, China</conf-loc>
          <fpage>77</fpage>
          <lpage>81</lpage>
          <pub-id pub-id-type="doi">10.1145/3661638.3661653</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Shen</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Tan</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Xi</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Lu</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Zhuang</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>PromptNER: prompt locating and typing for named entity recognition</article-title>
          <year>2023</year>
          <conf-name>Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)</conf-name>
          <conf-date>July, 2023</conf-date>
          <conf-loc>Toronto, ON</conf-loc>
          <fpage>12492</fpage>
          <lpage>12507</lpage>
          <pub-id pub-id-type="doi">10.18653/v1/2023.acl-long.698</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>He</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Mao</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Gong</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Cambria</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Template-free prompting for few-shot named entity recognition via semantic-enhanced contrastive learning</article-title>
          <source>IEEE Trans Neural Networks Learn Syst</source>
          <year>2024</year>
          <volume>35</volume>
          <issue>12</issue>
          <fpage>18357</fpage>
          <lpage>18369</lpage>
          <pub-id pub-id-type="doi">10.1109/tnnls.2023.3314807</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Tsujimura</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Yamada</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Ida</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Miwa</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Sasaki</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Contextualized medication event extraction with striding NER and multi-turn QA</article-title>
          <source>J Biomed Inform</source>
          <year>2023</year>
          <volume>144</volume>
          <fpage>104416</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(23)00137-5"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2023.104416</pub-id>
          <pub-id pub-id-type="medline">37321443</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(23)00137-5</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Wan</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Deng</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Du</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Yu</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>kNN-ICL: Compositional task-oriented parsing generalization with nearest neighbor in-context learning</article-title>
          <year>2024</year>
          <conf-name>Proceedings of the 2024 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 1: Long Papers)</conf-name>
          <conf-date>June, 2024</conf-date>
          <conf-loc>Mexico City, Mexico</conf-loc>
          <fpage>326</fpage>
          <lpage>337</lpage>
          <pub-id pub-id-type="doi">10.18653/v1/2024.naacl-long.19</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref38">
        <label>38</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gao</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Yao</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>SimCSE: simple contrastive learning of sentence embeddings</article-title>
          <source>ArXiv. Preprint posted online on April 18, 2021</source>
          <year>2021</year>
          <pub-id pub-id-type="doi">10.48550/arXiv.2104.08821</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref39">
        <label>39</label>
        <nlm-citation citation-type="web">
          <source>Acge_text_embedding</source>
          <access-date>2024-05-20</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://github.com/intsig-textin/acge_text_embedding">https://github.com/intsig-textin/acge_text_embedding</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref40">
        <label>40</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Douze</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Guzhva</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Deng</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Johnson</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Szilvasy</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Mazaré</surname>
              <given-names>PE</given-names>
            </name>
            <name name-style="western">
              <surname>Lomeli</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Hosseini</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Jégou</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>The Faiss library</article-title>
          <source>ArXiv. Preprint posted online on January 16, 2024</source>
          <year>2024</year>
          <issue>2</issue>
          <pub-id pub-id-type="doi">10.5260/chara.21.2.8</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref41">
        <label>41</label>
        <nlm-citation citation-type="web">
          <article-title>THUDM/glm-4-9b-chat</article-title>
          <source>Hugging Face</source>
          <access-date>2024-05-28</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://huggingface.co/THUDM/glm-4-9b-chat">https://huggingface.co/THUDM/glm-4-9b-chat</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref42">
        <label>42</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Hui</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Zheng</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Yu</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Zhou</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Dong</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Wei</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Tang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Tu</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Ma</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Qwen2 technical report</article-title>
          <source>ArXiv</source>
          <year>2019</year>
          <pub-id pub-id-type="doi">10.5260/chara.21.2.8</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref43">
        <label>43</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Yu</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Bidirectional LSTM-CRF models for sequence tagging</article-title>
          <source>ArXiv. Preprint posted online on August 09, 2015</source>
          <year>2015</year>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://arxiv.org/abs/1508.01991"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref44">
        <label>44</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Li</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Fei</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Teng</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Ji</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <article-title>Unified named entity recognition as word-word relation classification</article-title>
          <year>2021</year>
          <conf-name>Proceedings of the AAAI Conference on Artificial Intelligence</conf-name>
          <conf-date>February 20-27, 2024</conf-date>
          <conf-loc>Vancouver, BC</conf-loc>
          <fpage>10965</fpage>
          <lpage>10973</lpage>
          <pub-id pub-id-type="doi">10.1609/aaai.v36i10.21344</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref45">
        <label>45</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Qin</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Dagar</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Ye</surname>
              <given-names>W</given-names>
            </name>
          </person-group>
          <article-title>In-context learning with iterative demonstration selection</article-title>
          <source>ArXiv</source>
          <year>2019</year>
          <pub-id pub-id-type="doi">10.5260/chara.21.2.8</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref46">
        <label>46</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hao</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Sun</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Dong</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Han</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Gu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Wei</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <article-title>Structured prompting: scaling in-context learning to 1,000 examples</article-title>
          <source>ArXiv. Preprint posted online on December 13, 2022</source>
          <year>2022</year>
          <pub-id pub-id-type="doi">10.48550/arXiv.2212.06713</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref47">
        <label>47</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Divya</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Alejandro</surname>
              <given-names>SM</given-names>
            </name>
            <name name-style="western">
              <surname>Ralf</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>The effects of data quality on named entity recognition</article-title>
          <year>2024</year>
          <conf-name>Proceedings of the Ninth Workshop on Noisy and User-generated Text (W-NUT 2024)</conf-name>
          <conf-date>November 15, 2024</conf-date>
          <conf-loc>San Ġiljan, Malta</conf-loc>
          <fpage>79</fpage>
          <lpage>88</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://aclanthology.org/2024.wnut-1.8/"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref48">
        <label>48</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lu</surname>
              <given-names>RS</given-names>
            </name>
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>CC</given-names>
            </name>
            <name name-style="western">
              <surname>Tsao</surname>
              <given-names>HY</given-names>
            </name>
          </person-group>
          <article-title>Empowering large language models to leverage domain-specific knowledge in E-learning</article-title>
          <source>Appl Sci</source>
          <year>2024</year>
          <volume>14</volume>
          <issue>12</issue>
          <fpage>5264</fpage>
          <pub-id pub-id-type="doi">10.3390/app14125264</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref49">
        <label>49</label>
        <nlm-citation citation-type="web">
          <article-title>Intelligence UAB on a governing AI for humanity: interim report</article-title>
          <source>Advisory Body on Artificial Intelligence, UN</source>
          <access-date>2024-09-22</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://digitallibrary.un.org">https://digitallibrary.un.org</ext-link>
            <ext-link ext-link-type="webcite" xlink:href="4062495"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref50">
        <label>50</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Peng</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Mao</surname>
              <given-names>Z</given-names>
            </name>
          </person-group>
          <article-title>Large language models for human–robot interaction: a review</article-title>
          <source>Biomimetic Intell Rob</source>
          <year>2023</year>
          <volume>3</volume>
          <issue>4</issue>
          <fpage>100131</fpage>
          <pub-id pub-id-type="doi">10.1016/j.birob.2023.100131</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref51">
        <label>51</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Terry</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Cai</surname>
              <given-names>CJ</given-names>
            </name>
          </person-group>
          <article-title>AI chains: transparent and controllable human-AI interaction by chaining large language model prompts</article-title>
          <year>2022</year>
          <conf-name>CHI '22: Proceedings of the 2022 CHI Conference on Human Factors in Computing Systems</conf-name>
          <conf-date>April 29, 2022</conf-date>
          <conf-loc>New Orleans, LA</conf-loc>
          <fpage>1</fpage>
          <lpage>22</lpage>
          <pub-id pub-id-type="doi">10.1145/3491102.3517582</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref52">
        <label>52</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Salmani</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Omidi</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Ren</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Rezagholizadeh</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Eshaghi</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Beyond the limits: a survey of techniques to extend the context length in large language models</article-title>
          <year>2024</year>
          <conf-name>Proceedings of the Thirty-Third International Joint Conference on Artificial Intelligence Survey Track</conf-name>
          <conf-date>August 3-9, 2024</conf-date>
          <conf-loc>Jeju, Korea</conf-loc>
          <pub-id pub-id-type="doi">10.24963/ijcai.2024/917</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref53">
        <label>53</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gao</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Xiong</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Gao</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Jia</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Pan</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Bi</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Dai</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Sun</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Retrieval-augmented generation for large language models: a survey</article-title>
          <source>ArXiv. Preprint posted online on December 18, 2023</source>
          <year>2023</year>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://arxiv.org/abs/2312.10997"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref54">
        <label>54</label>
        <nlm-citation citation-type="web">
          <source>GitHub</source>
          <access-date>2025-03-11</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://github.com/Maizio/JMIR_NER.git">https://github.com/Maizio/JMIR_NER.git</ext-link>
          </comment>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
