<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMIR</journal-id>
      <journal-id journal-id-type="nlm-ta">J Med Internet Res</journal-id>
      <journal-title>Journal of Medical Internet Research</journal-title>
      <issn pub-type="epub">1438-8871</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v25i1e41100</article-id>
      <article-id pub-id-type="pmid">36884281</article-id>
      <article-id pub-id-type="doi">10.2196/41100</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Supervised Relation Extraction Between Suicide-Related Entities and Drugs: Development and Usability Study of an Annotated PubMed Corpus</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Eysenbach</surname>
            <given-names>Gunther</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Song</surname>
            <given-names>Yun Kyoung</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Song</surname>
            <given-names>Hyun-Je</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Pandey</surname>
            <given-names>Santosh</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Aslam</surname>
            <given-names>Muhammad Shahzad</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Karapetian</surname>
            <given-names>Karina</given-names>
          </name>
          <degrees>MS</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-5363-5171</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Jeon</surname>
            <given-names>Soo Min</given-names>
          </name>
          <degrees>PharmD, PhD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-5100-5739</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Kwon</surname>
            <given-names>Jin-Won</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-3467-7805</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Suh</surname>
            <given-names>Young-Kyoon</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>School of Computer Science and Engineering</institution>
            <institution>Kyungpook National University</institution>
            <addr-line>Rm. 520, IT-5</addr-line>
            <addr-line>80 Daehak-ro, Bukgu</addr-line>
            <addr-line>Daegu, 41566</addr-line>
            <country>Republic of Korea</country>
            <phone>82 53 950 6372</phone>
            <email>yksuh@knu.ac.kr</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-3124-2566</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>School of Computer Science and Engineering</institution>
        <institution>Kyungpook National University</institution>
        <addr-line>Daegu</addr-line>
        <country>Republic of Korea</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>College of Pharmacy</institution>
        <institution>Jeju National University</institution>
        <addr-line>Jeju</addr-line>
        <country>Republic of Korea</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>BK21 FOUR Community-Based Intelligent Novel Drug Discovery Education Unit, College of Pharmacy and Research Institute of Pharmaceutical Sciences</institution>
        <institution>Kyungpook National University</institution>
        <addr-line>Daegu</addr-line>
        <country>Republic of Korea</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Young-Kyoon Suh <email>yksuh@knu.ac.kr</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <year>2023</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>8</day>
        <month>3</month>
        <year>2023</year>
      </pub-date>
      <volume>25</volume>
      <elocation-id>e41100</elocation-id>
      <history>
        <date date-type="received">
          <day>16</day>
          <month>7</month>
          <year>2022</year>
        </date>
        <date date-type="rev-request">
          <day>27</day>
          <month>9</month>
          <year>2022</year>
        </date>
        <date date-type="rev-recd">
          <day>18</day>
          <month>11</month>
          <year>2022</year>
        </date>
        <date date-type="accepted">
          <day>19</day>
          <month>12</month>
          <year>2022</year>
        </date>
      </history>
      <copyright-statement>©Karina Karapetian, Soo Min Jeon, Jin-Won Kwon, Young-Kyoon Suh. Originally published in the Journal of Medical Internet Research (https://www.jmir.org), 08.03.2023.</copyright-statement>
      <copyright-year>2023</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in the Journal of Medical Internet Research, is properly cited. The complete bibliographic information, a link to the original publication on https://www.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://www.jmir.org/2023/1/e41100" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Drug-induced suicide has been debated as a crucial issue in both clinical and public health research. Published research articles contain valuable data on the drugs associated with suicidal adverse events. An automated process that extracts such information and rapidly detects drugs related to suicide risk is essential but has not been well established. Moreover, few data sets are available for training and validating classification models on drug-induced suicide.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>This study aimed to build a corpus of drug-suicide relations containing annotated entities for drugs, suicidal adverse events, and their relations. To confirm the effectiveness of the drug-suicide relation corpus, we evaluated the performance of a relation classification model using the corpus in conjunction with various embeddings.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>We collected the abstracts and titles of research articles associated with drugs and suicide from PubMed and manually annotated them along with their relations at the sentence level (adverse drug events, treatment, suicide means, or miscellaneous). To reduce the manual annotation effort, we preliminarily selected sentences with a pretrained zero-shot classifier or sentences containing only drug and suicide keywords. We trained a relation classification model using various Bidirectional Encoder Representations from Transformer embeddings with the proposed corpus. We then compared the performances of the model with different Bidirectional Encoder Representations from Transformer–based embeddings and selected the most suitable embedding for our corpus.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>Our corpus comprised 11,894 sentences extracted from the titles and abstracts of the PubMed research articles. Each sentence was annotated with drug and suicide entities and the relationship between these 2 entities (adverse drug events, treatment, means, and miscellaneous). All of the tested relation classification models that were fine-tuned on the corpus accurately detected sentences of suicidal adverse events regardless of their pretrained type and data set properties.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>To our knowledge, this is the first and most extensive corpus of drug-suicide relations.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>suicide</kwd>
        <kwd>adverse drug events</kwd>
        <kwd>information extraction</kwd>
        <kwd>relation classification</kwd>
        <kwd>bidirectional encoder representations from transformers</kwd>
        <kwd>pharmacovigilance</kwd>
        <kwd>natural language processing</kwd>
        <kwd>PubMed</kwd>
        <kwd>corpus</kwd>
        <kwd>language model</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <sec>
        <title>Background</title>
        <p>Suicide is an intentional death that is caused by self-harm. Although global suicide rates have declined in recent years, suicide accounts for approximately 700,000 deaths (1.3% of all deaths) per annum [<xref ref-type="bibr" rid="ref1">1</xref>]. The Comprehensive Mental Health Action Plan (2013-2020) of the World Health Organization argues that suicide remains a critical global public health problem [<xref ref-type="bibr" rid="ref1">1</xref>].</p>
        <p>Although suicide can be triggered by multiple factors and their complex effects [<xref ref-type="bibr" rid="ref2">2</xref>], most cases are related to psychiatric disorders such as depression, psychosis, anxiety, and substance use [<xref ref-type="bibr" rid="ref3">3</xref>]. Physical disorders such as cancer, respiratory diseases, hypertension, and diabetes are also debated as risk factors for suicide [<xref ref-type="bibr" rid="ref4">4</xref>,<xref ref-type="bibr" rid="ref5">5</xref>]. Effective treatment of individual patients can avoid and decrease the suicide risk associated with these factors; however, caution is required because the prescribed drug may itself be an independent risk factor for suicide.</p>
        <p>Several studies have suggested a link between suicidal behaviors (suicidal ideation, attempted suicide, and completed suicide) and adverse events associated with prescribed drugs [<xref ref-type="bibr" rid="ref6">6</xref>-<xref ref-type="bibr" rid="ref9">9</xref>]. For instance, a previous meta-analysis of clinical trials showed that selective serotonin reuptake inhibitors (SSRIs) tend to increase the risk of suicidality in patients with depression and all indications [<xref ref-type="bibr" rid="ref10">10</xref>]. Consequently, the United States Food and Drug Administration issued a black box warning for the suicidal risk of SSRIs. Qato et al [<xref ref-type="bibr" rid="ref11">11</xref>] investigated the use of drugs that pose a potential suicide risk in the United States. They reported 103 drugs associated with suicidality as an adverse event; furthermore, the use of these drugs substantially increased from 17.3% in 2005-2006 to 23.5% in 2013-2014 [<xref ref-type="bibr" rid="ref11">11</xref>].</p>
        <p>To prevent and reduce the occurrence of drug-induced suicide, we must improve our knowledge of the drugs that pose a potential suicide risk. Although clinical trials have evaluated the efficacy and safety of drugs in the premarketing phase, they usually have strict inclusion and exclusion criteria, short-term duration, and small sample size, which limit their ability to detect <italic>rare</italic> adverse drug events (ADEs) [<xref ref-type="bibr" rid="ref12">12</xref>-<xref ref-type="bibr" rid="ref14">14</xref>]. Therefore, ongoing evaluations of drugs introduced to the market, called postmarketing surveillance, are crucial for rare ADEs such as suicide.</p>
      </sec>
      <sec>
        <title>Theoretical Background</title>
        <p>Among various sources of information on ADEs in the postmarketing surveillance field, research articles are the most informative. However, extracting such information from these data sources is challenging because it is recorded in an unstructured free-text format.</p>
        <p>Automatic information extraction systems can be developed through natural language processing (NLP), a field of computer science and artificial intelligence. A system that automatically excerpts information from research articles can accelerate the task of identifying drugs with potential suicide risk.</p>
        <p>The most general purpose corpora for relation extraction tasks in the biomedical domain contain diverse entities and relations [<xref ref-type="bibr" rid="ref15">15</xref>-<xref ref-type="bibr" rid="ref17">17</xref>]. More narrowly focused data sets represent the interactions between diseases [<xref ref-type="bibr" rid="ref18">18</xref>], drugs [<xref ref-type="bibr" rid="ref19">19</xref>], chemical components and diseases [<xref ref-type="bibr" rid="ref20">20</xref>], and drug and ADEs [<xref ref-type="bibr" rid="ref21">21</xref>,<xref ref-type="bibr" rid="ref22">22</xref>]. However, these corpora contain insufficient data when developing an information extraction system for drug-related suicidal events. For instance, the MEDLINE ADE data set contains only 3 (0.04%) suicide-related entities among 6821 sentences. These sentences are presented in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> [<xref ref-type="bibr" rid="ref15">15</xref>,<xref ref-type="bibr" rid="ref21">21</xref>-<xref ref-type="bibr" rid="ref23">23</xref>].</p>
        <p>Several studies have attempted to classify sequences as suicide-related or nonsuicide-related sentences [<xref ref-type="bibr" rid="ref24">24</xref>-<xref ref-type="bibr" rid="ref26">26</xref>]. Such fixed relation agents require information on the agents themselves in the data set because the model must learn the entities between which the relation should be classified. Furthermore, models developed using the data sources of social media may not be adjustable to data from research articles, mainly because scientific texts follow strict grammatical rules rather than social language [<xref ref-type="bibr" rid="ref27">27</xref>], which is characterized by a high rate of abbreviations, nonformal terminology, and metaphoric phrases [<xref ref-type="bibr" rid="ref28">28</xref>].</p>
      </sec>
      <sec>
        <title>Related Work</title>
        <p>As drug-induced suicide is a type of ADE, we reviewed the published data sets on ADEs. Most of these data sets contain information on drugs and conditions (eg, diseases, signs, and symptoms) and the relationship between these entities. Nikfarjam et al [<xref ref-type="bibr" rid="ref23">23</xref>] created the ADRMine data set from posts on Twitter and the health-related social network DailyStrength [<xref ref-type="bibr" rid="ref29">29</xref>]. They annotated signs and symptoms at the sentence level, including adverse drug reactions. Van Mulligen et al [<xref ref-type="bibr" rid="ref15">15</xref>] created the EU-ADR corpus from the titles and abstracts of MEDLINE articles. They annotated the drugs and diseases and the relationship between these entities. For instance, a <italic>drug-disease</italic> relation in their corpus indicates that the drug may produce an adverse effect at the sentence level but does not necessarily imply an ADE. Schulz et al [<xref ref-type="bibr" rid="ref16">16</xref>] developed another corpus based on case reports from PubMed. They annotated the cases, conditions, findings, factors, negation modifiers, and relationship between these entities. Gurulingappa et al [<xref ref-type="bibr" rid="ref21">21</xref>] developed a MEDLINE ADE corpus to support the automatic extraction of drug-related adverse events from case reports in MEDLINE (a subset of PubMed). Their corpus contains 4272 unique sentences and 6821 relations. Alvaro et al [<xref ref-type="bibr" rid="ref22">22</xref>] created a source-comparative corpus called TwiMed, which includes annotated drugs, symptoms, diseases, and negative drug-associated outcomes. <xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref> provides a detailed comparison of these corpora.</p>
      </sec>
      <sec>
        <title>Problem</title>
        <p>Several studies have produced various drug-related corpora and general diseases. However, as the existing corpora seldom focus on drug-induced suicide events, we cannot gain extensive knowledge of medicines that pose a potential risk of suicide. This knowledge gap limits our ability to prevent and reduce the occurrence of drug-related suicides. Moreover, few corpora include the directional relationship between drugs and suicide and vice versa. To address these concerns, we constructed a novel drug-suicide relation (DSR) corpus from a wide range of biomedical articles on PubMed.</p>
      </sec>
      <sec>
        <title>Objective</title>
        <p>The objective of our research was to construct a DSR corpus. The obtained corpus consisted of 11,894 sentences extracted from PubMed research articles. It included (1) annotations on 2 entities (drug and suicidal events) and (2) annotations on the relations between the entities. PubMed provides access to broad-spectrum articles in the biomedical field, covering &#62;70% of all publications [<xref ref-type="bibr" rid="ref30">30</xref>]. Therefore, our corpus may be useful for developing information extraction models for diverse biomedical databases. To validate our corpus, we evaluated the relation classification performances of Bidirectional Encoder Representations from Transformer (BERT) models fine-tuned on data sets with diverse properties extracted from our corpus.</p>
      </sec>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Overview</title>
        <p>This study was conducted in two phases: (1) construction of the DSR corpus and (2) validation of the DSR corpus. To implement the first phase, we developed a sophisticated workflow comprising four steps: (1) data collection, (2) preprocessing stage, (3) data annotation, and (4) postprocessing stage. First, we gathered data from DrugBank and PubMed and preprocessed them for further annotation. Second, we manually annotated the entity pairs and relation classes for each sentence. Third, we created the corpus from the raw annotations via postprocessing of the labeled data. We then built various data sets from the corpus with different parameters for the subsequent phase. In the second phase of our study, we evaluated the performance of the BERT-based relation classification model using several language models (LMs) fine-tuned on various data sets compiled from our developed corpus. Both phases were implemented using Python 3.7. <xref rid="figure1" ref-type="fig">Figure 1</xref> shows the overall workflow for constructing and testing the DSR corpus.</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>Workflow of constructing and testing the DSR (drug-suicide relation) corpus. BERT: Bidirectional;  Encoder Representations from Transformer; NER: named entity recognition; NLTK: natural language toolkit.</p>
          </caption>
          <graphic xlink:href="jmir_v25i1e41100_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Generation of the DSR Corpus</title>
        <sec>
          <title>Data Collection and Preprocessing</title>
          <p>We collected the titles and abstracts of all available articles in English on the association between drugs and suicide published by October 13, 2021. <xref ref-type="boxed-text" rid="box1">Textbox 1</xref> presents the search queries used in this study.</p>
          <p>PubMed contains metadata at the level of a paper, which are useful for data filtering in the collection stage. When building the search query, we used the Medical Subject Headings (MeSH) terms [<xref ref-type="bibr" rid="ref31">31</xref>] “suicidal ideation,” “suicide, attempted,” “suicide, completed,” and “suicide,” along with text words associated with the keyword “suicide” in PubMed. We considered generic drug names from DrugBank version 5.1.8 [<xref ref-type="bibr" rid="ref32">32</xref>,<xref ref-type="bibr" rid="ref33">33</xref>] and their synonyms for drugs. We excluded drugs categorized as vitamins, mineral supplements, tonics, blood substitutes, emollients and protectives, antiseptics and disinfectants, or medicated dressings according to the Anatomical Therapeutic Chemical Classification System [<xref ref-type="bibr" rid="ref34">34</xref>] and various sections of the classification. We used <italic>PyMed</italic> package (version 0.8.9) [<xref ref-type="bibr" rid="ref35">35</xref>] for PubMed to automate the task of collecting the titles and abstracts of articles associated with each drug.</p>
          <p>The collected titles and abstracts were tokenized at the sentence level using a pretrained tokenizer in the <italic>NLTK</italic> package (version 3.6.1; [<xref ref-type="bibr" rid="ref36">36</xref>]). Among the sentences obtained (N=172,249) from 17,017 articles on PubMed, we collected only those sentences containing information on drugs and suicide. The DSR corpus was then developed at the sentence level as follows: first, sentences containing at least one mention of a drug were selected. Second, we chose suicide-related sentences that (1) contained the suicidal keyword “suicid,” (a stemmed version of the word “suicide”) or (2) are classified as “suicidal” by a model. Yin et al [<xref ref-type="bibr" rid="ref37">37</xref>] proposed a method using models pretrained on natural language understanding data sets as zero-shot sequence classifiers<bold>.</bold> To check whether the suicide-related sentences are classified as “suicidal,” we used a Bidirectional and Auto-Regressive Transformers (BART) large model [<xref ref-type="bibr" rid="ref38">38</xref>] pretrained on the Multi-Genre Natural Language Inference corpus [<xref ref-type="bibr" rid="ref39">39</xref>] with the custom binary classification of “suicide” and “non-suicide.” If the model infers that a given sentence is “suicide” with a probability of ≥.5, it assigns a suicidal label to that sentence. Finally, we obtained 9732 data entries for annotation.</p>
          <boxed-text id="box1" position="float">
            <title>PubMed query template for retrieving drug-mentioning suicide-related articles.</title>
            <p>(%DRUG% [Supplementary concept] OR %DRUG%[MeSH Terms] OR %DRUG%[TW]) AND (“suicidal ideation”[MeSH Terms] OR “suicide, attempted”[MeSH Terms] OR “suicide, completed”[MeSH Terms] OR “suicide”[MeSH Terms] OR suicid[TW] OR suicidals[TW] OR suicidality[TW] OR suicide[TW] OR suicidal[TW] OR suiciders[TW] OR suicidally[TW] OR suicides[TW] OR suicide s[TW] OR suicided[TW]) AND</p>
            <p>(English[Language])</p>
          </boxed-text>
        </sec>
        <sec>
          <title>Data Annotation</title>
          <p>During the data annotation stage, our workflow assigned three labels to each sentence: (1) drug entity, (2) suicide entity, and (3) relation class. Two annotators with pharmacological backgrounds participated independently in the annotation process. First, 2 annotators reviewed the automatically annotated [<xref ref-type="bibr" rid="ref40">40</xref>] labels of drug entities. The annotators assigned each drug’s generic name, brand name, class name, and abbreviated name as a drug entity. The metabolite and salt forms of the drug were excluded. Second, they manually annotated the suicide entities in each sentence. The suicidal entities were defined as mentions of suicide-related events, tendencies, and behaviors, including suicide risk, suicidal attempt, completed suicide, and suicidal ideation, or suicide-related behavior disorders. Third, they classified the relation class for each sentence as an “<italic>adverse drug event</italic> (ADE),” “<italic>suicide means,</italic>” “<italic>treatment,</italic>” “<italic>miscellaneous</italic>” (such as comparative sentences, research objectives, miscellaneous sentences, and no explicit relation), or “<italic>none.</italic>”</p>
          <p>The primary relations between a drug and a suicidal entity were set as follows:</p>
          <list list-type="order">
            <list-item>
              <p>ADEs: This relation indicates that suicidal events, including suicide attempts, suicide completions, and self-harm–related behaviors, followed the drug administration.</p>
            </list-item>
            <list-item>
              <p>Suicide means: This relation indicates that the drug was deliberately used (ie, taken in overdose) to commit suicide.</p>
            </list-item>
            <list-item>
              <p>Treatment: This relation indicates that the drug was used to treat the signs or symptoms of suicidal ideation and suicidal behavior disorder.</p>
            </list-item>
          </list>
          <p>When multiple entities for drugs or suicide appeared in a single sentence, we represented all “sentence–drug entity–suicidal entity” cases by duplicating the sentence. The “relation-class” label was excluded from the identifying representation because the relations between the same entities cannot overlap. The annotation guidelines are detailed in <xref ref-type="supplementary-material" rid="app3">Multimedia Appendix 3</xref> [<xref ref-type="bibr" rid="ref40">40</xref>]. <xref rid="figure2" ref-type="fig">Figure 2</xref> [<xref ref-type="bibr" rid="ref41">41</xref>] shows some relation-class entries. Each data entry includes a sentence, drug entity, suicide-related entity, and the relation class between the 2 entities.</p>
          <fig id="figure2" position="float">
            <label>Figure 2</label>
            <caption>
              <p>Examples of relation class entries: the sentences of each class in the Doccano environment are annotated. ADE: adverse drug event; CDI: Children's Depression Inventory; SE: suicidal entity.</p>
            </caption>
            <graphic xlink:href="jmir_v25i1e41100_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
        </sec>
        <sec>
          <title>Interannotator Agreement</title>
          <p>Two annotators with pharmacological backgrounds independently annotated the drug and suicide entities and their relations in each sentence. The annotators then compared their annotations and matched the annotations for drug and suicide entities according to the annotation guidelines (<xref ref-type="supplementary-material" rid="app3">Multimedia Appendix 3</xref>). When a disagreement was observed, the annotations were matched by 2 independent reviewers (one pharmacist and the other with an NLP background). To validate the annotated relations between entities, we measured interannotator agreement using the Cohen κ method [<xref ref-type="bibr" rid="ref42">42</xref>]. We aligned the proposed relation classes of the 2 annotators between the same pair of entities in the same sentence. The interannotator agreement score was calculated as a pairwise Cohen κ score. The data were annotated with a Cohen κ score of 0.64, implying a substantial level of agreement [<xref ref-type="bibr" rid="ref43">43</xref>,<xref ref-type="bibr" rid="ref44">44</xref>].</p>
        </sec>
        <sec>
          <title>Annotation Postprocessing Process</title>
          <p>In the postprocessing stage, we revised the annotations of labels completed by the annotators and adjusted the data format to be used for relation classification with the BERT models. Before this step, the data were sorted in ascending order of occurrence number of each sentence in the data set. This sorting procedure reduced the probability of choosing duplicates when constructing the data set (selecting specific classes and implementing the downsampling process). Meanwhile, we eliminated examples with (1) ambiguous annotations not related to suicide (such as “suicide gene” and “suicidal patients”) for the suicidal entity, (2) at least one missing value of assigned labels, (3) sentence lengths &#62;512 characters (the maximum allowed by the vanilla BERT model) [<xref ref-type="bibr" rid="ref45">45</xref>], (4) no mentions of the annotated entities in the sentence, or (5) overlapped entities. Excluding the examples with sentence lengths &#62;512 characters was deemed acceptable as most (but not all) of the recent relation classification models [<xref ref-type="bibr" rid="ref46">46</xref>-<xref ref-type="bibr" rid="ref52">52</xref>] use BERT-based or RoBERTa-based approaches [<xref ref-type="bibr" rid="ref53">53</xref>]. Although the BERT architecture of RoBERTa [<xref ref-type="bibr" rid="ref53">53</xref>] has been optimized for faster learning, the maximum sequence length remains at 512 characters. Furthermore, such long sentences were few in our corpus; therefore, their impact was almost negligible. We then distributed the data records with multiple appearances of the same entity in a sentence and calculated the exact positions of the entities in the sentence. Finally, we obtained the final corpus with a size of 11,894.</p>
        </sec>
      </sec>
      <sec>
        <title>Validation of the DSR Corpus: Fine-tuning R-BERT Models for Relation Classification</title>
        <sec>
          <title>Data Set Construction</title>
          <p>For the relation classification experiments, we constructed several data sets based on our DSR corpus, removing duplicated sentences to avoid the overfitting risk. As our DSR corpus is imbalanced, we applied random downsampling to control the distribution between the relation classifications. In previous studies, this approach achieved the highest performance at all levels of imbalance [<xref ref-type="bibr" rid="ref54">54</xref>]. In addition, because differences in entity order can affect the performance of the relation classification model [<xref ref-type="bibr" rid="ref55">55</xref>,<xref ref-type="bibr" rid="ref56">56</xref>], we designated the order of drug and suicide entities in the relation class. For example, if the drug entity (e1) preceded the suicidal entity (e2) in a sentence, the sentence was designated as “e1-e2”; otherwise, it was designated as “e2-e1.”</p>
          <p>The performance of the relation classification model is also affected by the properties of the data set. Therefore, we constructed various data sets with different properties from our DSR corpus and compared the model performances on each data set.</p>
          <p><xref ref-type="table" rid="table1">Table 1</xref> lists the properties of the data sets used in this study. The data set properties are the <italic>split ratio for training and test data sets, categorization of relation classifications</italic>, and <italic>order of entity mentions (within a sentence)</italic>.</p>
          <table-wrap position="float" id="table1">
            <label>Table 1</label>
            <caption>
              <p>Eight data sets constructed from our drug-suicide relation (DSR) corpus and their respective properties.</p>
            </caption>
            <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
              <col width="120"/>
              <col width="380"/>
              <col width="360"/>
              <col width="140"/>
              <thead>
                <tr valign="top">
                  <td>Data set</td>
                  <td>Split ratio for training and test data sets (training:test)</td>
                  <td>Categorization of relation classifications</td>
                  <td>Order of entities</td>
                </tr>
              </thead>
              <tbody>
                <tr valign="top">
                  <td>1</td>
                  <td>90%:10%</td>
                  <td>None and ADE<sup>a</sup></td>
                  <td>No</td>
                </tr>
                <tr valign="top">
                  <td>2</td>
                  <td>80%:20%</td>
                  <td>None and ADE</td>
                  <td>Yes</td>
                </tr>
                <tr valign="top">
                  <td>3</td>
                  <td>90%:10%</td>
                  <td>None and ADE</td>
                  <td>No</td>
                </tr>
                <tr valign="top">
                  <td>4</td>
                  <td>80%:20%</td>
                  <td>None and ADE</td>
                  <td>Yes</td>
                </tr>
                <tr valign="top">
                  <td>5</td>
                  <td>90%:10%</td>
                  <td>None, ADE, suicide means, and treatment</td>
                  <td>No</td>
                </tr>
                <tr valign="top">
                  <td>6</td>
                  <td>80%:20%</td>
                  <td>None, ADE, suicide means, and treatment</td>
                  <td>Yes</td>
                </tr>
                <tr valign="top">
                  <td>7</td>
                  <td>90%:10%</td>
                  <td>None, ADE, suicide means, and treatment</td>
                  <td>No</td>
                </tr>
                <tr valign="top">
                  <td>8</td>
                  <td>80%:20%</td>
                  <td>None, ADE, suicide means, and treatment</td>
                  <td>Yes</td>
                </tr>
              </tbody>
            </table>
            <table-wrap-foot>
              <fn id="table1fn1">
                <p><sup>a</sup>ADE: adverse drug event.</p>
              </fn>
            </table-wrap-foot>
          </table-wrap>
        </sec>
        <sec>
          <title>R-BERT Model and Evaluation Metrics of Relation Classification</title>
          <p>A suicide-drug relation class in a sentence containing an entity pair was predicted using the relation classification model R-BERT [<xref ref-type="bibr" rid="ref46">46</xref>]. The R-BERT model enriches the pretrained BERT [<xref ref-type="bibr" rid="ref45">45</xref>] model with entity information for relation classification by placing a special token at the beginning and end of each entity. In this study, vanilla BERT [<xref ref-type="bibr" rid="ref45">45</xref>], BioBERT [<xref ref-type="bibr" rid="ref57">57</xref>], PubMedBERT [<xref ref-type="bibr" rid="ref58">58</xref>], ClinicalBERT [<xref ref-type="bibr" rid="ref59">59</xref>], and SciBERT [<xref ref-type="bibr" rid="ref60">60</xref>] LMs were used as the embedding layers of R-BERT. We fine-tuned the resulting R-BERT variations in 10 epochs and increased the maximum sentence length to 512, which is a limitation of the BERT model [<xref ref-type="bibr" rid="ref45">45</xref>]. A 10-fold cross-validation of all data sets was performed using the <italic>Stratified Shuffle Split</italic> method provided in the <italic>sklearn</italic> library (version 1.0.2; [<xref ref-type="bibr" rid="ref61">61</xref>]).</p>
          <p>The performances of the relation classification models on ADE classes were evaluated in terms of the <italic>F</italic><sub>1</sub>-score, defined as the weighted average of precision (ratio of correctly predicted positive observations to all predicted positive observations) and recall (ratio of correctly predicted positive observations to all observations in the actual class). The <italic>F</italic><sub>1</sub>-score is considered as the gold standard of relation extraction, relation classification, and other NLP tasks. In the present evaluation, the true class was the ADE class and the false class was the non-ADE class.</p>
        </sec>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <p>On the basis of the titles and abstracts of 17,017 articles collected from PubMed, we created a corpus of 11,894 sentences with drug-suicide entity pairs and their relation classes.</p>
      <p><xref ref-type="table" rid="table2">Table 2</xref> presents the frequencies of sentences in each relation class of the DSR corpus. The most frequent relation classes are “miscellaneous” (4250/11,894, 35.73%) and “none” (3761/11,894, 31.62%). The most common relation class is “Suicide means” (1726/11,894, 14.51%) followed by “treatment” (1281/11,894, 10.77%) and “ADE” (876/11,894, 7.36%). In the sentences of the “none,” “ADE,” and “treatment” classes, the “e1-e2” order appears more frequently than the “e2-e1” order. In the sentences classified as “suicide means” and “miscellaneous,” the order was similarly distributed between “e1-e2” and “e2-e1.”</p>
      <p><xref ref-type="table" rid="table3">Table 3</xref> presents the top 10 most frequently mentioned drugs and their respective relation classes in the sentences of our DSR corpus (listed are the numbers of drug names, not the numbers of sentences). The most frequently mentioned ADE drug was isotretinoin (34/717, 4.7%), followed by varenicline (33/717, 4.6%), fluoxetine (30/717, 4.2%), and paroxetine (29/717, 4%). In the “suicidal means” category, the most commonly mentioned drug is insulin (63/1549, 4.07%). In the “treatment relation class,” the most commonly mentioned drugs are lithium (331/1042, 31.77%) and ketamine (261/1042, 25.05%). Most of the “treatment” drugs were among the top 10 drugs in “ADE.” Next, we explored the embedding LM that best improved the relation classification performance of the R-BERT model fine-tuned with our corpus.</p>
      <p><xref ref-type="table" rid="table4">Table 4</xref> shows the performances of various R-BERT models with different embedded LMs after refinement on distinct data sets (<xref ref-type="table" rid="table1">Table 1</xref> describes the properties of the data sets derived from our corpus). The <italic>F</italic><sub>1</sub>-score of the R-BERT models ranged from 0.8781 to 0.9583. Overall, BioBERT predicted the ADE class better than the other embedding models, with an average <italic>F</italic><sub>1</sub>-score of 0.9362. BioBERT also achieved the highest <italic>F</italic><sub>1</sub>-score across 6 of the 8 data sets (the exceptions were data sets 5 and 8). Even in the exception cases, BioBERT achieved near-optimal performance. BioBERT was closely lagged by PubMedBERT (average <italic>F</italic><sub>1</sub>-score=0.9238), which did not perform optimally across all the individual experiments.</p>
      <p>Among the different data sets, data set 1 achieved the highest average <italic>F</italic><sub>1</sub>-score. Data set 1 ignores the entity order and uses a 90% split ratio and a binary class (0.9498; see the <italic>Average</italic> column in <xref ref-type="table" rid="table4">Table 4</xref>). Meanwhile, 4 out of the 5 LMs achieved their highest <italic>F</italic><sub>1</sub>-score when fine-tuned on data set 1 (BioBERT, 0.9583; PubMedBERT, 0.9503; ClinicalBERT, 0.9519; and SciBERT, 0.9496).</p>
      <table-wrap position="float" id="table2">
        <label>Table 2</label>
        <caption>
          <p>Frequency of sentences (N=11,894) in each relation class in our drug-suicide relation (DSR) corpus (“suicide means” is the most common relation class).</p>
        </caption>
        <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
          <col width="30"/>
          <col width="600"/>
          <col width="0"/>
          <col width="190"/>
          <col width="0"/>
          <col width="180"/>
          <thead>
            <tr valign="top">
              <td colspan="3">Class and ordered entity pair of drug and suicidal entity</td>
              <td colspan="2">ID</td>
              <td>Value, n</td>
            </tr>
          </thead>
          <tbody>
            <tr valign="top">
              <td colspan="3">
                <bold>No relation (n=3761, 31.62%)</bold>
              </td>
              <td colspan="2">0</td>
              <td>
                <break/>
              </td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>No relation (e1-e2)</td>
              <td colspan="2">
                <break/>
              </td>
              <td colspan="2">2226</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>No relation (e2-e1)</td>
              <td colspan="2">
                <break/>
              </td>
              <td colspan="2">1535</td>
            </tr>
            <tr valign="top">
              <td colspan="3">
                <bold>ADE<sup>a</sup> (n=876, 7.37%)</bold>
              </td>
              <td colspan="2">1</td>
              <td>
                <break/>
              </td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>DRUG-ADE (e1-e2)</td>
              <td colspan="2">
                <break/>
              </td>
              <td colspan="2">512</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>DRUG-ADE (e2-e1)</td>
              <td colspan="2">
                <break/>
              </td>
              <td colspan="2">364</td>
            </tr>
            <tr valign="top">
              <td colspan="3">
                <bold>Means (n=1726, 14.51%)</bold>
              </td>
              <td colspan="2">2</td>
              <td>
                <break/>
              </td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Means-event (e1-e2)</td>
              <td colspan="2">
                <break/>
              </td>
              <td colspan="2">844</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Means-event (e2-e1)</td>
              <td colspan="2">
                <break/>
              </td>
              <td colspan="2">882</td>
            </tr>
            <tr valign="top">
              <td colspan="3">
                <bold>Treatment (n=1281, 10.77%)</bold>
              </td>
              <td colspan="2">3</td>
              <td>
                <break/>
              </td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Treatment-event (e1-e2)</td>
              <td colspan="2">
                <break/>
              </td>
              <td colspan="2">890</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Treatment-event (e2-e1)</td>
              <td colspan="2">
                <break/>
              </td>
              <td colspan="2">391</td>
            </tr>
            <tr valign="top">
              <td colspan="3">
                <bold>Miscellaneous (n=4250, 35.73%)</bold>
              </td>
              <td colspan="2">9</td>
              <td>
                <break/>
              </td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Miscellaneous (e1-e2)</td>
              <td colspan="2">
                <break/>
              </td>
              <td colspan="2">2141</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Miscellaneous (e2-e1)</td>
              <td colspan="2">
                <break/>
              </td>
              <td colspan="2">2109</td>
            </tr>
          </tbody>
        </table>
        <table-wrap-foot>
          <fn id="table2fn1">
            <p><sup>a</sup>ADE: adverse drug event.</p>
          </fn>
        </table-wrap-foot>
      </table-wrap>
      <table-wrap position="float" id="table3">
        <label>Table 3</label>
        <caption>
          <p>Top 10 drugs in each relation class of our drug-suicide relation (DSR) corpus (m: # of sentences mentioning an associated drug name).</p>
        </caption>
        <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
          <col width="110"/>
          <col width="110"/>
          <col width="110"/>
          <col width="110"/>
          <col width="110"/>
          <col width="110"/>
          <col width="100"/>
          <col width="140"/>
          <col width="100"/>
          <thead>
            <tr valign="bottom">
              <td>Rank</td>
              <td colspan="2">Total (<italic>m</italic>=3308)</td>
              <td colspan="2">ADE<sup>a</sup> (<italic>m</italic>=717)</td>
              <td colspan="2">Means (<italic>m</italic>=1549)</td>
              <td colspan="2">Treatment (<italic>m</italic>=1042)</td>
            </tr>
            <tr valign="top">
              <td/>
              <td>Drug</td>
              <td>Count</td>
              <td>Drug</td>
              <td>Count</td>
              <td>Drug</td>
              <td>Count</td>
              <td>Drug</td>
              <td>Count</td>
            </tr>
          </thead>
          <tbody>
            <tr valign="top">
              <td>1</td>
              <td>Lithium</td>
              <td>354</td>
              <td>Isotretinoin</td>
              <td>34</td>
              <td>Insulin</td>
              <td>63</td>
              <td>Lithium</td>
              <td>331</td>
            </tr>
            <tr valign="top">
              <td>2</td>
              <td>Ketamine</td>
              <td>264</td>
              <td>Varenicline</td>
              <td>33</td>
              <td>Paracetamol</td>
              <td>56</td>
              <td>Ketamine</td>
              <td>261</td>
            </tr>
            <tr valign="top">
              <td>3</td>
              <td>Clozapine</td>
              <td>168</td>
              <td>Fluoxetine</td>
              <td>30</td>
              <td>Barbiturates</td>
              <td>38</td>
              <td>Clozapine</td>
              <td>141</td>
            </tr>
            <tr valign="top">
              <td>4</td>
              <td>Insulin</td>
              <td>67</td>
              <td>Paroxetine</td>
              <td>29</td>
              <td>Metformin</td>
              <td>38</td>
              <td>Fluoxetine</td>
              <td>22</td>
            </tr>
            <tr valign="top">
              <td>5</td>
              <td>Fluoxetine</td>
              <td>58</td>
              <td>Cocaine</td>
              <td>25</td>
              <td>Caffeine</td>
              <td>25</td>
              <td>Buprenorphine</td>
              <td>21</td>
            </tr>
            <tr valign="top">
              <td>6</td>
              <td>Paracetamol</td>
              <td>57</td>
              <td>Zolpidem</td>
              <td>22</td>
              <td>Colchicine</td>
              <td>25</td>
              <td>Esketamine</td>
              <td>16</td>
            </tr>
            <tr valign="top">
              <td>7</td>
              <td>Barbiturates</td>
              <td>43</td>
              <td>Rimonabant</td>
              <td>17</td>
              <td>Amitriptyline</td>
              <td>19</td>
              <td>Paroxetine</td>
              <td>9</td>
            </tr>
            <tr valign="top">
              <td>8</td>
              <td>Metformin</td>
              <td>40</td>
              <td>Venlafaxine</td>
              <td>15</td>
              <td>Analgesics</td>
              <td>19</td>
              <td>Olanzapine</td>
              <td>9</td>
            </tr>
            <tr valign="top">
              <td>9</td>
              <td>Cocaine</td>
              <td>39</td>
              <td>Lithium</td>
              <td>14</td>
              <td>Diazepam</td>
              <td>19</td>
              <td>Milnacipran</td>
              <td>7</td>
            </tr>
            <tr valign="top">
              <td>10</td>
              <td>Paroxetine</td>
              <td>39</td>
              <td>Clozapine</td>
              <td>13</td>
              <td>Nicotine</td>
              <td>16</td>
              <td>Antidepressants</td>
              <td>6</td>
            </tr>
          </tbody>
        </table>
        <table-wrap-foot>
          <fn id="table3fn1">
            <p><sup>a</sup>ADE: adverse drug event.</p>
          </fn>
        </table-wrap-foot>
      </table-wrap>
      <table-wrap position="float" id="table4">
        <label>Table 4</label>
        <caption>
          <p>Performance comparison of various R-BERT (Bidirectional Encoder Representations from Transformers) models built by (1) applying different language models (LMs) as embedding layers and (2) fine-tuning different data sets.</p>
        </caption>
        <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
          <col width="90"/>
          <col width="140"/>
          <col width="140"/>
          <col width="200"/>
          <col width="190"/>
          <col width="130"/>
          <col width="110"/>
          <thead>
            <tr valign="top">
              <td>Data set</td>
              <td>Vanilla BERT [<xref ref-type="bibr" rid="ref45">45</xref>]</td>
              <td>BioBERT [<xref ref-type="bibr" rid="ref57">57</xref>]</td>
              <td>PubMedBERT [<xref ref-type="bibr" rid="ref58">58</xref>]</td>
              <td>ClinicalBERT [<xref ref-type="bibr" rid="ref59">59</xref>]</td>
              <td>SciBERT [<xref ref-type="bibr" rid="ref60">60</xref>]</td>
              <td>Average</td>
            </tr>
          </thead>
          <tbody>
            <tr valign="top">
              <td>1</td>
              <td>0.9389</td>
              <td>0.9583</td>
              <td>0.9503</td>
              <td>0.9519</td>
              <td>0.9496</td>
              <td>0.9498</td>
            </tr>
            <tr valign="top">
              <td>2</td>
              <td>0.9435</td>
              <td>0.9528</td>
              <td>0.9487</td>
              <td>0.9489</td>
              <td>0.9459</td>
              <td>0.9480</td>
            </tr>
            <tr valign="top">
              <td>3</td>
              <td>0.9360</td>
              <td>0.9522</td>
              <td>0.9447</td>
              <td>0.9432</td>
              <td>0.9497</td>
              <td>0.9451</td>
            </tr>
            <tr valign="top">
              <td>4</td>
              <td>0.9406</td>
              <td>0.9486</td>
              <td>0.9372</td>
              <td>0.9448</td>
              <td>0.9468</td>
              <td>0.9436</td>
            </tr>
            <tr valign="top">
              <td>5</td>
              <td>0.9093</td>
              <td>0.9223</td>
              <td>0.9039</td>
              <td>0.9262</td>
              <td>0.9179</td>
              <td>0.9159</td>
            </tr>
            <tr valign="top">
              <td>6</td>
              <td>0.8847</td>
              <td>0.9268</td>
              <td>0.9143</td>
              <td>0.9102</td>
              <td>0.9113</td>
              <td>0.9095</td>
            </tr>
            <tr valign="top">
              <td>7</td>
              <td>0.8965</td>
              <td>0.9194</td>
              <td>0.9059</td>
              <td>0.9132</td>
              <td>0.8961</td>
              <td>0.9062</td>
            </tr>
            <tr valign="top">
              <td>8</td>
              <td>0.8781</td>
              <td>0.9089</td>
              <td>0.8856</td>
              <td>0.9162</td>
              <td>0.9100</td>
              <td>0.8998</td>
            </tr>
            <tr valign="top">
              <td>Average</td>
              <td>0.9159</td>
              <td>0.9362</td>
              <td>0.9238</td>
              <td>0.9318</td>
              <td>0.9284</td>
              <td>N/A<sup>a</sup></td>
            </tr>
          </tbody>
        </table>
        <table-wrap-foot>
          <fn id="table4fn1">
            <p><sup>a</sup>N/A: not applicable.</p>
          </fn>
        </table-wrap-foot>
      </table-wrap>
      <p><xref ref-type="table" rid="table5">Table 5</xref> presents the performance results of the R-BERT models in terms of the different properties of the 8 data sets. The average <italic>F</italic><sub>1</sub>-score for each property was determined from all the individual experimental results. When the training:testing split ratio of the data set was 90%:10%, the average <italic>F</italic><sub>1</sub>-score was 0.9297, which was only 0.49% higher than that of the 80%:20% split ratio (<italic>F</italic><sub>1</sub>=0.9248). This performance difference is minor. On average, the models performed 3.88% better in the binary class (<italic>F</italic><sub>1</sub>=0.9466) than in the quaternary class (<italic>F</italic><sub>1</sub>=0.9078). This result indicates a need to improve the performance of <italic>n</italic>-ary classification when <italic>n</italic> is &#62;2. Finally, learning the order of the entities (0.9260) improved the performance by 0.24% compared with ignoring the ordering (0.9260), which is consistent with earlier findings [<xref ref-type="bibr" rid="ref55">55</xref>,<xref ref-type="bibr" rid="ref56">56</xref>]. The same tendencies frequently appeared in the precision and recall results (<xref ref-type="supplementary-material" rid="app4">Multimedia Appendix 4</xref>).</p>
      <table-wrap position="float" id="table5">
        <label>Table 5</label>
        <caption>
          <p>Average performances of the R-BERT (Bidirectional Encoder Representations from Transformers) models on data sets with different properties (the binary relation data set yields the best <italic>F</italic><sub>1</sub>-score).</p>
        </caption>
        <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
          <col width="30"/>
          <col width="500"/>
          <col width="470"/>
          <thead>
            <tr valign="top">
              <td colspan="2">Data set properties and category</td>
              <td><italic>F</italic><sub>1</sub>-score, mean (SD)</td>
            </tr>
          </thead>
          <tbody>
            <tr valign="top">
              <td colspan="3">
                <bold>Split ratio for training and test data sets</bold>
              </td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>90%:10%</td>
              <td>0.9297 (0.0078)</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>80%:20%</td>
              <td>0.9248 (0.0078)</td>
            </tr>
            <tr valign="top">
              <td colspan="3">
                <bold>Relation set</bold>
              </td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Binary relation set</td>
              <td>0.9466 (0.0048)</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Quaternary relation set</td>
              <td>0.9078 (0.0110)</td>
            </tr>
            <tr valign="top">
              <td colspan="3">
                <bold>Ordered entity pair of drug and suicidal entities</bold>
              </td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Yes</td>
              <td>0.9284 (0.0058)</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>No</td>
              <td>0.9260 (0.0103)</td>
            </tr>
          </tbody>
        </table>
      </table-wrap>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <p>To our knowledge, this is the first and largest data set of DSRs. The existing data sets include information on ADEs but do not focus on drug-suicide ADEs; thus, they deliver insufficient data on drug-suicide associations. Among the 6821 sentences on drug-related adverse events in the MEDLINE corpus, only 3 (0.04%) contained an entity related to suicide. In contrast, our corpus contained a large number (876) of entities uniquely relating suicide as an ADE.</p>
        <p>A valuable data set must contain sufficient data. When collecting the titles and abstracts containing information on DSRs, we applied a detailed search query using both MeSH and text words. The MeSH term was particularly useful when searching for a wide range of articles in PubMed. Previous studies used only MeSH terms when searching PubMed for corpora. However, the indexing time of MeSH is likely to miss the latest relevant articles [<xref ref-type="bibr" rid="ref62">62</xref>]. DeMars and Perrusso [<xref ref-type="bibr" rid="ref63">63</xref>] compared the precision and recall of each strategy after searching for relevant articles using MeSH and text words in PubMed. They recommended combining MeSH and text words to obtain the most comprehensive number of papers.</p>
        <p>Manual annotation is time-consuming, costly, and laborious. Although MeSH and text words garnered the titles and abstracts from articles mentioning drugs and suicidal behaviors, it could not guarantee that every sentence was suicide related. To address this problem, we filtered the sentences classified as suicide relevant using a pretrained zero-shot classifier. In other words, we checked whether the classifier assessed the given sentences as suicide related and contained suicidal keywords. Consequently, only 6.9% (11,894/172,249) sentences collected from PubMed included relevant information for the DSR corpus. This new approach effectively reduced the data that could be annotated and provided a new strategy for preannotations. To reduce the annotation effort, previous studies randomly sampled the initial documents [<xref ref-type="bibr" rid="ref15">15</xref>,<xref ref-type="bibr" rid="ref18">18</xref>,<xref ref-type="bibr" rid="ref20">20</xref>-<xref ref-type="bibr" rid="ref22">22</xref>], restricted the publication date of the documents [<xref ref-type="bibr" rid="ref18">18</xref>,<xref ref-type="bibr" rid="ref22">22</xref>], or filtered the initial documents based on some required properties [<xref ref-type="bibr" rid="ref18">18</xref>]. These techniques risk decreasing the quantity of fundamental data that can be collected and annotated.</p>
        <p>Some of the top 10 drugs associated with ADEs (fluoxetine, paroxetine, venlafaxine, lithium, and clozapine) were also classified as treatment drugs. This tendency may reflect the ongoing controversy on the association between suicide and drugs administered to patients with mental health disorders. Some representative studies have reported that SSRIs effectively prevent suicidal risk, whereas others have reported that such drugs potentially increase the suicidal risk [<xref ref-type="bibr" rid="ref64">64</xref>]. Furthermore, medication adherence is an important determining factor for successful pharmacotherapy for mental disorders. To fill this data gap, diverse methods for real-time monitoring of medication adherence using the medical devices have been recently reported [<xref ref-type="bibr" rid="ref65">65</xref>].</p>
        <p>We also evaluated the performance of the R-BERT relation classification model with several pretrained LMs as the embedding layers. After pretraining on PubMed, R-BERT provided a slightly higher relation classification performance on the corpus with BioBERT than with PubmedBERT. This tendency can be explained either by the larger pretraining vocabulary of BioBERT than that of PubmedBERT or the continuous pretraining process of BioBert from the base LM [<xref ref-type="bibr" rid="ref58">58</xref>] (whereas PubmedBERT was pretrained from scratch). Increasing the pretraining data set and vocabulary increases the diversity of the patterns that a model can learn. The results indicate that BioBERT maintains the base vocabulary during ongoing pretraining and uses the base (Vanilla BERT) weights as the initial weights.</p>
        <p>Concerning the data set properties, the performance was maximized when the data set was split into a 90%:10% training:testing ratio, when the classification scheme was binary, and when the entities were ordered. More importantly, all tested models classified the drug-suicide relationships with <italic>F</italic><sub>1</sub>-score around 0.9 after fine-tuning on our corpus, higher than on the available corpora. For example, Gurulingappa et al [<xref ref-type="bibr" rid="ref21">21</xref>], who dealt with sentence classification, reported an <italic>F</italic><sub>1</sub>-score of 0.70 after training MaxEnt on the MEDLINE corpus. Kim et al [<xref ref-type="bibr" rid="ref66">66</xref>], who dealt with key sentence extraction, trained the BERT classification model on the Drug-Food Interaction corpus of drug and food interactions, obtaining <italic>F</italic><sub>1</sub>-score from 0.506 to 0.738. The varied scopes and sizes of corpora and the different types of classification models preclude a direct comparison of results of this study with those of the previous studies. Nevertheless, this result clearly demonstrates the value of our corpus in NLP tasks.</p>
        <p>These results were obtained through experiments on a specific type of ADE but appear to be applicable to other drug-related adverse events. All nondrug entities were linked to suicide in our research, but the portion of the corpus having the assigned ADE relation can (in theory) be used to investigate drug adverse events not related to suicide. In practice, applying a specific type of ADE to a broader ADE task may decrease the overall performance or change the performances of different LMs. Masking the events in BERT<sub>MTB+EM</sub> [<xref ref-type="bibr" rid="ref47">47</xref>] might reduce the effect of suicide-related bias, but eliminating the bias through event masking is difficult because specific words cueing the suicidal nature of an entity may remain in the context; for example, a sentence with the entities excluded can retain the term “attempted.”</p>
        <p>This corpus is extendible to the development of other NLP systems. For instance, an automatic extraction system accessing our corpus can obtain additional information on the drug-suicide association, such as treatment of suicidal ideation and drugs used in suicide attempts. Our DSR corpus contains sufficient data on the DSR not only for “ADEs” but also for “suicidal treatment” and “means” (14.5% and 10.8% of the corpus, respectively). Moreover, the newly discovered suicide-related entity can complement the existing named entity recognition tools.</p>
      </sec>
      <sec>
        <title>Limitations</title>
        <p>There are some drawbacks to this study. First, the ADEs are more narrowly distributed than other relation categories, leading to potential class imbalances when developing relation classification systems using the corpus. To alleviate these problems, we performed downsampling [<xref ref-type="bibr" rid="ref67">67</xref>] and eliminated the sentence duplicates before applying the relation classification model to various data sets generated from our corpus. We expect this treatment to offset the negative effects of the class imbalance. Solving for the class imbalance issues is beyond the scope of this work but should be addressed in future work. For the same reasons, we did not explore the noisy miscellaneous class, which reveals little information on DSRs. The “Miscellaneous” class is also worthy of investigation in future studies. Note that this work concentrated on building the data set and assessing its suitability in performance evaluations. Moreover, we restricted the sentence length to 512 characters (the upper limit of BERT encoding), but this restriction could be relaxed for NLP jobs that do not use BERT. This study excluded overlaps between drugs and suicidal events. Finally, because this corpus was created solely from academic literature, its scope may not extend to social media.</p>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>Extracted from research articles, this developed DSR corpus is the largest and most comprehensive corpus for drug-suicide entities and their relations (<xref ref-type="supplementary-material" rid="app5">Multimedia Appendix 5</xref>). After confirming the consistency of the annotations in the DSR corpus, we applied a new approach for reducing the load of manual annotations. When fine-tuned on our corpus, all R-BERT models achieved competitive performance with <italic>F</italic><sub>1</sub>-score above or only slightly below 0.9. We believe that our corpus can be widely used for developing automatic information extraction systems and for activating relevant research on DSRs.</p>
        <p>In future, we plan to expand the data set by revising ambiguous cases and diversifying the ADE class into 6 subclasses [<xref ref-type="bibr" rid="ref68">68</xref>]. We will also cover colloquial text sources from Twitter and other social media sites.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>Sentences with suicidal entities from the MEDLINE adverse drug event data set.</p>
        <media xlink:href="jmir_v25i1e41100_app1.xlsx" xlink:title="XLSX File  (Microsoft Excel File), 10 KB"/>
      </supplementary-material>
      <supplementary-material id="app2">
        <label>Multimedia Appendix 2</label>
        <p>Comparison with the previous corpus with adverse drug events (ADEs) annotations.</p>
        <media xlink:href="jmir_v25i1e41100_app2.docx" xlink:title="DOCX File , 15 KB"/>
      </supplementary-material>
      <supplementary-material id="app3">
        <label>Multimedia Appendix 3</label>
        <p>Annotation guidelines of the drug-suicide relation corpus.</p>
        <media xlink:href="jmir_v25i1e41100_app3.pdf" xlink:title="PDF File  (Adobe PDF File), 93 KB"/>
      </supplementary-material>
      <supplementary-material id="app4">
        <label>Multimedia Appendix 4</label>
        <p><italic>F</italic><sub>1</sub> score, precision, and recall results of the R-BERT (Bidirectional Encoder Representations from Transformer) model differentiated by embedding-layer language models and fine-tuning data sets.</p>
        <media xlink:href="jmir_v25i1e41100_app4.xlsx" xlink:title="XLSX File  (Microsoft Excel File), 52 KB"/>
      </supplementary-material>
      <supplementary-material id="app5">
        <label>Multimedia Appendix 5</label>
        <p>Drug-suicide relation corpus.</p>
        <media xlink:href="jmir_v25i1e41100_app5.xlsx" xlink:title="XLSX File  (Microsoft Excel File), 1077 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">ADE</term>
          <def>
            <p>adverse drug event</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">BART</term>
          <def>
            <p>Bidirectional and Auto-Regressive Transformers</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">BERT</term>
          <def>
            <p>Bidirectional Encoder Representations from Transformers</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">DSR</term>
          <def>
            <p>drug-suicide relation</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">LM</term>
          <def>
            <p>language model</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">MeSH</term>
          <def>
            <p>Medical Subject Headings</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">NLP</term>
          <def>
            <p>natural language processing</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">SSRI</term>
          <def>
            <p>selective serotonin reuptake inhibitor</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>This research was supported by the Basic Science Research Program through the National Research Foundation of Korea (NRF) funded by the Ministry of Education (NRF-2022R1I1A1A01065589 and NRF-2018R1A6A1A03025109).</p>
    </ack>
    <fn-group>
      <fn fn-type="con">
        <p>KK and SMJ contributed equally as first authors. J-WK and Y-KS contributed equally as the corresponding authors. All authors contributed to the conceptualization of the study.</p>
        <p>KK conceptualized the workflow, conducted the data collection and experiments, and developed and edited annotation guidelines and the original manuscript. SMJ developed and edited the annotation guidelines, supervised the annotation process, interpreted the analyses, and wrote and edited the original manuscript. J-WK and Y-KS interpreted the analyses, edited the manuscript, and coordinated the project. All authors approved the final version of the manuscript.</p>
      </fn>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="web">
          <source>Comprehensive Mental Health Action Plan 2013-2030</source>
          <year>2021</year>
          <month>9</month>
          <day>21</day>
          <access-date>2022-10-11</access-date>
          <publisher-loc>Geneva</publisher-loc>
          <publisher-name>World Health Organization</publisher-name>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.who.int/publications/i/item/9789240031029">https://www.who.int/publications/i/item/9789240031029</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hawton</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>van Heeringen</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Suicide</article-title>
          <source>Lancet</source>
          <year>2009</year>
          <month>04</month>
          <volume>373</volume>
          <issue>9672</issue>
          <fpage>1372</fpage>
          <lpage>81</lpage>
          <pub-id pub-id-type="doi">10.1016/s0140-6736(09)60372-x</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wasserman</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Rihmer</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Rujescu</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Sarchiapone</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Sokolowski</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Titelman</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Zalsman</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Zemishlany</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Carli</surname>
              <given-names>V</given-names>
            </name>
            <collab>European Psychiatric Association</collab>
          </person-group>
          <article-title>The European Psychiatric Association (EPA) guidance on suicide treatment and prevention</article-title>
          <source>Eur Psychiatry</source>
          <year>2012</year>
          <month>02</month>
          <day>15</day>
          <volume>27</volume>
          <issue>2</issue>
          <fpage>129</fpage>
          <lpage>41</lpage>
          <pub-id pub-id-type="doi">10.1016/j.eurpsy.2011.06.003</pub-id>
          <pub-id pub-id-type="medline">22137775</pub-id>
          <pub-id pub-id-type="pii">S0924-9338(11)00108-8</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Du</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Shi</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Yu</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Jin</surname>
              <given-names>X</given-names>
            </name>
            <collab>Yan-Qian</collab>
            <name name-style="western">
              <surname>Fu</surname>
              <given-names>X-L</given-names>
            </name>
            <name name-style="western">
              <surname>Song</surname>
              <given-names>Y-P</given-names>
            </name>
            <name name-style="western">
              <surname>Cai</surname>
              <given-names>J-Y</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>H-L</given-names>
            </name>
          </person-group>
          <article-title>Incidence of suicide death in patients with cancer: a systematic review and meta-analysis</article-title>
          <source>J Affect Disord</source>
          <year>2020</year>
          <month>11</month>
          <day>01</day>
          <volume>276</volume>
          <fpage>711</fpage>
          <lpage>9</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1016/j.jad.2020.07.082"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jad.2020.07.082</pub-id>
          <pub-id pub-id-type="medline">32794450</pub-id>
          <pub-id pub-id-type="pii">S0165-0327(20)32527-1</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bolton</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Walld</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Chateau</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Finlayson</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Sareen</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Risk of suicide and suicide attempts associated with physical disorders: a population-based, balancing score-matched analysis</article-title>
          <source>Psychol Med</source>
          <year>2014</year>
          <month>07</month>
          <day>17</day>
          <volume>45</volume>
          <issue>3</issue>
          <fpage>495</fpage>
          <lpage>504</lpage>
          <pub-id pub-id-type="doi">10.1017/s0033291714001639</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Simon</surname>
              <given-names>GE</given-names>
            </name>
            <name name-style="western">
              <surname>Savarino</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Operskalski</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>PS</given-names>
            </name>
          </person-group>
          <article-title>Suicide risk during antidepressant treatment</article-title>
          <source>Am J Psychiatry</source>
          <year>2006</year>
          <month>01</month>
          <volume>163</volume>
          <issue>1</issue>
          <fpage>41</fpage>
          <lpage>7</lpage>
          <pub-id pub-id-type="doi">10.1176/appi.ajp.163.1.41</pub-id>
          <pub-id pub-id-type="medline">16390887</pub-id>
          <pub-id pub-id-type="pii">163/1/41</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Thomas</surname>
              <given-names>KH</given-names>
            </name>
            <name name-style="western">
              <surname>Martin</surname>
              <given-names>RM</given-names>
            </name>
            <name name-style="western">
              <surname>Knipe</surname>
              <given-names>DW</given-names>
            </name>
            <name name-style="western">
              <surname>Higgins</surname>
              <given-names>JPT</given-names>
            </name>
            <name name-style="western">
              <surname>Gunnell</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Risk of neuropsychiatric adverse events associated with varenicline: systematic review and meta-analysis</article-title>
          <source>BMJ</source>
          <year>2015</year>
          <month>03</month>
          <day>12</day>
          <volume>350</volume>
          <issue>mar12 8</issue>
          <fpage>h1109</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.bmj.com/lookup/pmidlookup?view=long&#38;pmid=25767129"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/bmj.h1109</pub-id>
          <pub-id pub-id-type="medline">25767129</pub-id>
          <pub-id pub-id-type="pmcid">PMC4357491</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gorton</surname>
              <given-names>HC</given-names>
            </name>
            <name name-style="western">
              <surname>Webb</surname>
              <given-names>RT</given-names>
            </name>
            <name name-style="western">
              <surname>Kapur</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Ashcroft</surname>
              <given-names>DM</given-names>
            </name>
          </person-group>
          <article-title>Non-psychotropic medication and risk of suicide or attempted suicide: a systematic review</article-title>
          <source>BMJ Open</source>
          <year>2016</year>
          <month>01</month>
          <day>13</day>
          <volume>6</volume>
          <issue>1</issue>
          <fpage>e009074</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmjopen.bmj.com/lookup/pmidlookup?view=long&#38;pmid=26769782"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/bmjopen-2015-009074</pub-id>
          <pub-id pub-id-type="medline">26769782</pub-id>
          <pub-id pub-id-type="pii">bmjopen-2015-009074</pub-id>
          <pub-id pub-id-type="pmcid">PMC4735210</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Stone</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Laughren</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Jones</surname>
              <given-names>ML</given-names>
            </name>
            <name name-style="western">
              <surname>Levenson</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Holland</surname>
              <given-names>PC</given-names>
            </name>
            <name name-style="western">
              <surname>Hughes</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Hammad</surname>
              <given-names>TA</given-names>
            </name>
            <name name-style="western">
              <surname>Temple</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Rochester</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Risk of suicidality in clinical trials of antidepressants in adults: analysis of proprietary data submitted to US Food and Drug Administration</article-title>
          <source>BMJ</source>
          <year>2009</year>
          <month>08</month>
          <day>11</day>
          <volume>339</volume>
          <fpage>b2880</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/19671933"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/bmj.b2880</pub-id>
          <pub-id pub-id-type="medline">19671933</pub-id>
          <pub-id pub-id-type="pii">bmj.b2880</pub-id>
          <pub-id pub-id-type="pmcid">PMC2725270</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hammad</surname>
              <given-names>TA</given-names>
            </name>
            <name name-style="western">
              <surname>Laughren</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Racoosin</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Suicidality in pediatric patients treated with antidepressant drugs</article-title>
          <source>Arch Gen Psychiatry</source>
          <year>2006</year>
          <month>03</month>
          <day>01</day>
          <volume>63</volume>
          <issue>3</issue>
          <fpage>332</fpage>
          <lpage>9</lpage>
          <pub-id pub-id-type="doi">10.1001/archpsyc.63.3.332</pub-id>
          <pub-id pub-id-type="medline">16520440</pub-id>
          <pub-id pub-id-type="pii">63/3/332</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Qato</surname>
              <given-names>DM</given-names>
            </name>
            <name name-style="western">
              <surname>Ozenberger</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Olfson</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Prevalence of prescription medications with depression as a potential adverse effect among adults in the United States</article-title>
          <source>JAMA</source>
          <year>2018</year>
          <month>06</month>
          <day>12</day>
          <volume>319</volume>
          <issue>22</issue>
          <fpage>2289</fpage>
          <lpage>98</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/29896627"/>
          </comment>
          <pub-id pub-id-type="doi">10.1001/jama.2018.6741</pub-id>
          <pub-id pub-id-type="medline">29896627</pub-id>
          <pub-id pub-id-type="pii">2684607</pub-id>
          <pub-id pub-id-type="pmcid">PMC6583503</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Grapow</surname>
              <given-names>MT</given-names>
            </name>
            <name name-style="western">
              <surname>von Wattenwyl</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Guller</surname>
              <given-names>U</given-names>
            </name>
            <name name-style="western">
              <surname>Beyersdorf</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Zerkowski</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Randomized controlled trials do not reflect reality: real-world analyses are critical for treatment guidelines!</article-title>
          <source>J Thorac Cardiovasc Surg</source>
          <year>2006</year>
          <month>07</month>
          <volume>132</volume>
          <issue>1</issue>
          <fpage>5</fpage>
          <lpage>7</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S0022-5223(06)00504-6"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jtcvs.2006.03.035</pub-id>
          <pub-id pub-id-type="medline">16798293</pub-id>
          <pub-id pub-id-type="pii">S0022-5223(06)00504-6</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Phillips</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Hazell</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Sauzet</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Cornelius</surname>
              <given-names>V</given-names>
            </name>
          </person-group>
          <article-title>Analysis and reporting of adverse events in randomised controlled trials: a review</article-title>
          <source>BMJ Open</source>
          <year>2019</year>
          <month>03</month>
          <day>01</day>
          <volume>9</volume>
          <issue>2</issue>
          <fpage>e024537</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmjopen.bmj.com/lookup/pmidlookup?view=long&#38;pmid=30826796"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/bmjopen-2018-024537</pub-id>
          <pub-id pub-id-type="medline">30826796</pub-id>
          <pub-id pub-id-type="pii">bmjopen-2018-024537</pub-id>
          <pub-id pub-id-type="pmcid">PMC6398660</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Jureidini</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>McHenry</surname>
              <given-names>LB</given-names>
            </name>
          </person-group>
          <article-title>The Illusion of Evidence-Based Medicinexposing the Crisis of Credibility in Clinical Research</article-title>
          <source>BMJ</source>
          <year>2020</year>
          <month>03</month>
          <day>16</day>
          <fpage>o702</fpage>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>van Mulligen</surname>
              <given-names>EM</given-names>
            </name>
            <name name-style="western">
              <surname>Fourrier-Reglat</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Gurwitz</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Molokhia</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Nieto</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Trifiro</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Kors</surname>
              <given-names>JA</given-names>
            </name>
            <name name-style="western">
              <surname>Furlong</surname>
              <given-names>LI</given-names>
            </name>
          </person-group>
          <article-title>The EU-ADR corpus: annotated drugs, diseases, targets, and their relationships</article-title>
          <source>J Biomed Inform</source>
          <year>2012</year>
          <month>10</month>
          <volume>45</volume>
          <issue>5</issue>
          <fpage>879</fpage>
          <lpage>84</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(12)00057-3"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2012.04.004</pub-id>
          <pub-id pub-id-type="medline">22554700</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(12)00057-3</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Schulz</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>vSeva</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Rodriguez</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Ostendorff</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Rehm</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Named entities in medical case reports: corpus and experiments</article-title>
          <source>arXiv</source>
          <year>2020</year>
          <month>3</month>
          <day>29</day>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/2003.13032"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Jain</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Agrawal</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Saporta</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Truong</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Duong</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Bui</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Chambon</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Lungren</surname>
              <given-names>MP</given-names>
            </name>
            <name name-style="western">
              <surname>Ng</surname>
              <given-names>AY</given-names>
            </name>
            <name name-style="western">
              <surname>Langlotz</surname>
              <given-names>CP</given-names>
            </name>
            <name name-style="western">
              <surname>Rajpurkar</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>RadGraph: extracting clinical entities and relations from radiology reports</article-title>
          <source>ArXiv</source>
          <year>2021</year>
          <month>6</month>
          <day>28</day>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/2106.14463"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lai</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Lu</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Kuo</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Chung</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Han</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Tsai</surname>
              <given-names>RT</given-names>
            </name>
            <name name-style="western">
              <surname>Horng</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Using a large margin context-aware convolutional neural network to automatically extract disease-disease association from literature: comparative analytic study</article-title>
          <source>JMIR Med Inform</source>
          <year>2019</year>
          <month>11</month>
          <day>26</day>
          <volume>7</volume>
          <issue>4</issue>
          <fpage>e14502</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://medinform.jmir.org/2019/4/e14502/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/14502</pub-id>
          <pub-id pub-id-type="medline">31769759</pub-id>
          <pub-id pub-id-type="pii">v7i4e14502</pub-id>
          <pub-id pub-id-type="pmcid">PMC6913619</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Herrero-Zazo</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Segura-Bedmar</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Martínez</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Declerck</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>The DDI corpus: an annotated corpus with pharmacological substances and drug-drug interactions</article-title>
          <source>J Biomed Inform</source>
          <year>2013</year>
          <month>10</month>
          <volume>46</volume>
          <issue>5</issue>
          <fpage>914</fpage>
          <lpage>20</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(13)00112-3"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2013.07.011</pub-id>
          <pub-id pub-id-type="medline">23906817</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(13)00112-3</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Li</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Sun</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Johnson</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Sciaky</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Wei</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Leaman</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Davis</surname>
              <given-names>AP</given-names>
            </name>
            <name name-style="western">
              <surname>Mattingly</surname>
              <given-names>CJ</given-names>
            </name>
            <name name-style="western">
              <surname>Wiegers</surname>
              <given-names>TC</given-names>
            </name>
            <name name-style="western">
              <surname>Lu</surname>
              <given-names>Z</given-names>
            </name>
          </person-group>
          <article-title>BioCreative V CDR task corpus: a resource for chemical disease relation extraction</article-title>
          <source>Database (Oxford)</source>
          <year>2016</year>
          <volume>2016</volume>
          <fpage>baw068</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/27161011"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/database/baw068</pub-id>
          <pub-id pub-id-type="medline">27161011</pub-id>
          <pub-id pub-id-type="pii">baw068</pub-id>
          <pub-id pub-id-type="pmcid">PMC4860626</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gurulingappa</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Rajput</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Roberts</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Fluck</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Hofmann-Apitius</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Toldo</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Development of a benchmark corpus to support the automatic extraction of drug-related adverse effects from medical case reports</article-title>
          <source>J Biomed Inform</source>
          <year>2012</year>
          <month>10</month>
          <volume>45</volume>
          <issue>5</issue>
          <fpage>885</fpage>
          <lpage>92</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(12)00061-5"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2012.04.008</pub-id>
          <pub-id pub-id-type="medline">22554702</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(12)00061-5</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Alvaro</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Miyao</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Collier</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <article-title>TwiMed: Twitter and PubMed comparable corpus of drugs, diseases, symptoms, and their relations</article-title>
          <source>JMIR Public Health Surveill</source>
          <year>2017</year>
          <month>05</month>
          <day>03</day>
          <volume>3</volume>
          <issue>2</issue>
          <fpage>e24</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://publichealth.jmir.org/2017/2/e24/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/publichealth.6396</pub-id>
          <pub-id pub-id-type="medline">28468748</pub-id>
          <pub-id pub-id-type="pii">v3i2e24</pub-id>
          <pub-id pub-id-type="pmcid">PMC5438461</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Nikfarjam</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Sarker</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>O'Connor</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Ginn</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Gonzalez</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Pharmacovigilance from social media: mining adverse drug reaction mentions using sequence labeling with word embedding cluster features</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2015</year>
          <month>05</month>
          <volume>22</volume>
          <issue>3</issue>
          <fpage>671</fpage>
          <lpage>81</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/25755127"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/jamia/ocu041</pub-id>
          <pub-id pub-id-type="medline">25755127</pub-id>
          <pub-id pub-id-type="pii">ocu041</pub-id>
          <pub-id pub-id-type="pmcid">PMC4457113</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sawhney</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Manchanda</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Singh</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Aggarwal</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>A computational approach to feature extraction for identification of suicidal ideation in Tweets</article-title>
          <source>Proceedings of ACL 2018, Student Research Workshop</source>
          <year>2018</year>
          <conf-name>ACL 2018, Student Research Workshop</conf-name>
          <conf-date>Jul, 2018</conf-date>
          <conf-loc>Melbourne, Australia</conf-loc>
          <pub-id pub-id-type="doi">10.18653/v1/p18-3013</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>O'Dea</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Wan</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Batterham</surname>
              <given-names>PJ</given-names>
            </name>
            <name name-style="western">
              <surname>Calear</surname>
              <given-names>AL</given-names>
            </name>
            <name name-style="western">
              <surname>Paris</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Christensen</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Detecting suicidality on Twitter</article-title>
          <source>Internet Intervent</source>
          <year>2015</year>
          <month>05</month>
          <volume>2</volume>
          <issue>2</issue>
          <fpage>183</fpage>
          <lpage>8</lpage>
          <pub-id pub-id-type="doi">10.1016/j.invent.2015.03.005</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Cong</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Cuzzocrea</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Social big data: mining, applications, and beyond</article-title>
          <source>Complexity</source>
          <year>2019</year>
          <month>1</month>
          <fpage>1</fpage>
          <lpage>2</lpage>
          <pub-id pub-id-type="doi">10.1155/2019/2059075</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lippincott</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Séaghdha</surname>
              <given-names>DO</given-names>
            </name>
            <name name-style="western">
              <surname>Korhonen</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Exploring subdomain variation in biomedical language</article-title>
          <source>BMC Bioinformatics</source>
          <year>2011</year>
          <month>05</month>
          <day>27</day>
          <volume>12</volume>
          <issue>1</issue>
          <fpage>212</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcbioinformatics.biomedcentral.com/articles/10.1186/1471-2105-12-212"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/1471-2105-12-212</pub-id>
          <pub-id pub-id-type="medline">21619603</pub-id>
          <pub-id pub-id-type="pii">1471-2105-12-212</pub-id>
          <pub-id pub-id-type="pmcid">PMC3118171</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Baldwin</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Cook</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Lui</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>MacKinlay</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>How noisy social media text, how diffrnt social media sources?</article-title>
          <source>Proceedings of the Sixth International Joint Conference on Natural Language Processing</source>
          <year>2013</year>
          <conf-name>Sixth International Joint Conference on Natural Language Processing</conf-name>
          <conf-date>Oct 14-18, 2013</conf-date>
          <conf-loc>Nagoya, Japan</conf-loc>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="web">
          <article-title>Getting better together</article-title>
          <source>Daily Strength</source>
          <access-date>2022-11-01</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.dailystrength.org/">http://www.dailystrength.org/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Frandsen</surname>
              <given-names>TF</given-names>
            </name>
            <name name-style="western">
              <surname>Eriksen</surname>
              <given-names>MB</given-names>
            </name>
            <name name-style="western">
              <surname>Hammer</surname>
              <given-names>DM</given-names>
            </name>
            <name name-style="western">
              <surname>Christensen</surname>
              <given-names>JB</given-names>
            </name>
          </person-group>
          <article-title>PubMed coverage varied across specialties and over time: a large-scale study of included studies in Cochrane reviews</article-title>
          <source>J Clin Epidemiol</source>
          <year>2019</year>
          <month>08</month>
          <volume>112</volume>
          <fpage>59</fpage>
          <lpage>66</lpage>
          <pub-id pub-id-type="doi">10.1016/j.jclinepi.2019.04.015</pub-id>
          <pub-id pub-id-type="medline">31051247</pub-id>
          <pub-id pub-id-type="pii">S0895-4356(19)30083-6</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lipscomb</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Medical Subject Headings (MeSH)</article-title>
          <source>Bull Med Libr Assoc</source>
          <year>2000</year>
          <month>07</month>
          <volume>88</volume>
          <issue>3</issue>
          <fpage>265</fpage>
          <lpage>6</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/10928714"/>
          </comment>
          <pub-id pub-id-type="medline">10928714</pub-id>
          <pub-id pub-id-type="pmcid">PMC35238</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wishart</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Feunang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Guo</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Lo</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Marcu</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Grant</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Sajed</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Johnson</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Sayeeda</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Assempour</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Iynkkaran</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Maciejewski</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Gale</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Wilson</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Chin</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Cummings</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Le</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Pon</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Knox</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Wilson</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>DrugBank 5.0: a major update to the DrugBank database for 2018</article-title>
          <source>Nucleic Acids Res</source>
          <year>2018</year>
          <month>01</month>
          <day>04</day>
          <volume>46</volume>
          <issue>D1</issue>
          <fpage>D1074</fpage>
          <lpage>82</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/29126136"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/nar/gkx1037</pub-id>
          <pub-id pub-id-type="medline">29126136</pub-id>
          <pub-id pub-id-type="pii">4602867</pub-id>
          <pub-id pub-id-type="pmcid">PMC5753335</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wishart</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Knox</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Guo</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Cheng</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Shrivastava</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Tzur</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Gautam</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Hassanali</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>DrugBank: a knowledgebase for drugs, drug actions and drug targets</article-title>
          <source>Nucleic Acids Res</source>
          <year>2008</year>
          <month>01</month>
          <volume>36</volume>
          <issue>Database issue</issue>
          <fpage>D901</fpage>
          <lpage>6</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/18048412"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/nar/gkm958</pub-id>
          <pub-id pub-id-type="medline">18048412</pub-id>
          <pub-id pub-id-type="pii">gkm958</pub-id>
          <pub-id pub-id-type="pmcid">PMC2238889</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Nahler</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>anatomical therapeutic chemical classification system (ATC)</article-title>
          <source>Dictionary of Pharmaceutical Medicine</source>
          <year>2009</year>
          <publisher-loc>Vienna, Austria</publisher-loc>
          <publisher-name>Springer</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="web">
          <article-title>PyMed is a Python library that provides access to PubMed</article-title>
          <source>GitHub</source>
          <year>2018</year>
          <access-date>2021-01-01</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://github.com/gijswobben/pymed">https://github.com/gijswobben/pymed</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bird</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Klein</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Loper</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <source>Natural Language Processing with Python</source>
          <year>2009</year>
          <publisher-loc>Sebastopol, California, United States</publisher-loc>
          <publisher-name>O'Reilly Media</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yin</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Hay</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Roth</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Benchmarking zero-shot text classification: datasets, evaluation and entailment approach</article-title>
          <source>Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP)</source>
          <year>2019</year>
          <conf-name>2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP)</conf-name>
          <conf-date>Nov 3-7, 2019</conf-date>
          <conf-loc>Hong Kong, China</conf-loc>
          <pub-id pub-id-type="doi">10.18653/v1/d19-1404</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref38">
        <label>38</label>
        <nlm-citation citation-type="web">
          <article-title>bart-large-mnli</article-title>
          <source>Hugging Face</source>
          <access-date>2022-09-26</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://huggingface.co/facebook/bart-large-mnli">https://huggingface.co/facebook/bart-large-mnli</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref39">
        <label>39</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Williams</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Nangia</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Bowman</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>A broad-coverage challenge corpus for sentence understanding through inference</article-title>
          <source>Proceedings of the 2018 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long Papers)</source>
          <year>2018</year>
          <conf-name>2018 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long Papers)</conf-name>
          <conf-date>Jun 1-6, 2018</conf-date>
          <conf-loc>New Orleans, Louisiana</conf-loc>
          <pub-id pub-id-type="doi">10.18653/v1/n18-1101</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref40">
        <label>40</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kormilitzin</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Vaci</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Nevado-Holgado</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Med7: a transferable clinical natural language processing model for electronic health records</article-title>
          <source>Artif Intell Med</source>
          <year>2021</year>
          <month>08</month>
          <volume>118</volume>
          <fpage>102086</fpage>
          <pub-id pub-id-type="doi">10.1016/j.artmed.2021.102086</pub-id>
          <pub-id pub-id-type="medline">34412834</pub-id>
          <pub-id pub-id-type="pii">S0933-3657(21)00079-8</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref41">
        <label>41</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Nakayama</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Kubo</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Kamura</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Taniguchi</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Liang</surname>
              <given-names>X</given-names>
            </name>
          </person-group>
          <article-title>doccano: text annotation tool for human</article-title>
          <source>GitHub</source>
          <year>2018</year>
          <access-date>2022-10-20</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://github.com/doccano/doccano">https://github.com/doccano/doccano</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref42">
        <label>42</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cohen</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>A coefficient of agreement for nominal scales</article-title>
          <source>Educ Psychological Measure</source>
          <year>2016</year>
          <month>07</month>
          <day>02</day>
          <volume>20</volume>
          <issue>1</issue>
          <fpage>37</fpage>
          <lpage>46</lpage>
          <pub-id pub-id-type="doi">10.1177/001316446002000104</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref43">
        <label>43</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Artstein</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Poesio</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Inter-coder agreement for computational linguistics</article-title>
          <source>Computational Linguistics</source>
          <year>2008</year>
          <month>12</month>
          <volume>34</volume>
          <issue>4</issue>
          <fpage>555</fpage>
          <lpage>96</lpage>
          <pub-id pub-id-type="doi">10.1162/coli.07-034-R2</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref44">
        <label>44</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Viera</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Garrett</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Understanding interobserver agreement: the kappa statistic</article-title>
          <source>Fam Med</source>
          <year>2005</year>
          <month>05</month>
          <volume>37</volume>
          <issue>5</issue>
          <fpage>360</fpage>
          <lpage>3</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.stfm.org/fmhub/fm2005/May/Anthony360.pdf"/>
          </comment>
          <pub-id pub-id-type="medline">15883903</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref45">
        <label>45</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Devlin</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Chang</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Toutanova</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>BERT: pre-training of deep bidirectional transformers for language understand</article-title>
          <source>ArXiv</source>
          <year>2019</year>
          <month>5</month>
          <day>24</day>
        </nlm-citation>
      </ref>
      <ref id="ref46">
        <label>46</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>He</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Enriching pre-trained language model with entity information for relation classification</article-title>
          <source>arXiv</source>
          <year>2019</year>
          <pub-id pub-id-type="doi">10.48550/arXiv.1905.08284</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref47">
        <label>47</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Soares</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>FitzGerald</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Ling</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Kwiatkowski</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Matching the blanks: distributional similarity for relation learning</article-title>
          <source>arXiv</source>
          <year>2019</year>
          <month>6</month>
          <day>7</day>
          <pub-id pub-id-type="doi">10.48550/arXiv.1906.03158</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref48">
        <label>48</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yamada</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Asai</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Shindo</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Takeda</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Matsumoto</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>LUKE: deep contextualized entity representations with entity-aware self-attention</article-title>
          <source>Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP)</source>
          <year>2020</year>
          <conf-name>2020 Conference on Empirical Methods in Natural Language Processing (EMNLP)</conf-name>
          <conf-date>Nov 16-20, 2020</conf-date>
          <conf-loc>Online</conf-loc>
          <pub-id pub-id-type="doi">10.18653/v1/2020.emnlp-main.523</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref49">
        <label>49</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gao</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Han</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Zhu</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Sun</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Zhou</surname>
              <given-names>Z</given-names>
            </name>
          </person-group>
          <article-title>FewRel 2.0: towards more challenging few-shot relation classification</article-title>
          <source>Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP)</source>
          <year>2019</year>
          <conf-name>2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP)</conf-name>
          <conf-date>Nov 3–7, 2019</conf-date>
          <conf-loc>Hong Kong, China</conf-loc>
          <pub-id pub-id-type="doi">10.18653/v1/d19-1649</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref50">
        <label>50</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Joshi</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Weld</surname>
              <given-names>DS</given-names>
            </name>
            <name name-style="western">
              <surname>Zettlemoyer</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Levy</surname>
              <given-names>O</given-names>
            </name>
          </person-group>
          <article-title>SpanBERT: improving pre-training by representing and predicting spans</article-title>
          <source>Transact Assoc Computational Linguistic</source>
          <year>2020</year>
          <month>12</month>
          <volume>8</volume>
          <fpage>64</fpage>
          <lpage>77</lpage>
          <pub-id pub-id-type="doi">10.1162/tacl_a_00300</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref51">
        <label>51</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Gao</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Zhu</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>KEPLER: a unified model for knowledge embedding and pre-trained language representation</article-title>
          <source>Transact Assoc Computational Linguistics</source>
          <year>2021</year>
          <volume>9</volume>
          <fpage>176</fpage>
          <lpage>94</lpage>
          <pub-id pub-id-type="doi">10.1162/tacl_a_00360</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref52">
        <label>52</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Tang</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Duan</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Wei</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Ji</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Cao</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Jiang</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Zhou</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>K-adapter: infusing knowledge into pre-trained models with adapters</article-title>
          <source>arXiv</source>
          <year>2020</year>
          <month>12</month>
          <day>28</day>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/2002.01808"/>
          </comment>
          <pub-id pub-id-type="doi">10.48550/arXiv.2002.01808</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref53">
        <label>53</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Ott</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Goyal</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Du</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Joshi</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Levy</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Lewis</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Zettlemoyer</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Stoyanov</surname>
              <given-names>V</given-names>
            </name>
          </person-group>
          <article-title>RoBERTa: a robustly optimized BERT pretraining approach</article-title>
          <source>arXiv</source>
          <year>2019</year>
          <month>7</month>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1907.11692"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref54">
        <label>54</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>López</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Fernández</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>García</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Palade</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Herrera</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <article-title>An insight into classification with imbalanced data: empirical results and current trends on using data intrinsic characteristics</article-title>
          <source>Inform Sci</source>
          <year>2013</year>
          <month>11</month>
          <volume>250</volume>
          <fpage>113</fpage>
          <lpage>41</lpage>
          <pub-id pub-id-type="doi">10.1016/j.ins.2013.07.007</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref55">
        <label>55</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Han</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Peng</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Han</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Cui</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Distantly supervised relation extraction via recursive hierarchy-interactive attention and entity-order perception</article-title>
          <source>Neural Netw</source>
          <year>2022</year>
          <month>08</month>
          <volume>152</volume>
          <fpage>191</fpage>
          <lpage>200</lpage>
          <pub-id pub-id-type="doi">10.1016/j.neunet.2022.04.019</pub-id>
          <pub-id pub-id-type="medline">35533505</pub-id>
          <pub-id pub-id-type="pii">S0893-6080(22)00152-6</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref56">
        <label>56</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sabo</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Elazar</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Goldberg</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Dagan</surname>
              <given-names>I</given-names>
            </name>
          </person-group>
          <article-title>Revisiting few-shot relation classification: evaluation data and classification schemes</article-title>
          <source>Transact Assoc Computational Linguistics</source>
          <year>2021</year>
          <month>8</month>
          <day>2</day>
          <volume>9</volume>
          <fpage>691</fpage>
          <lpage>706</lpage>
          <pub-id pub-id-type="doi">10.1162/tacl_a_00392</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref57">
        <label>57</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Yoon</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>So</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Kang</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>BioBERT: a pre-trained biomedical language representation model for biomedical text mining</article-title>
          <source>Bioinformatics</source>
          <year>2020</year>
          <month>02</month>
          <day>15</day>
          <volume>36</volume>
          <issue>4</issue>
          <fpage>1234</fpage>
          <lpage>40</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/31501885"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/bioinformatics/btz682</pub-id>
          <pub-id pub-id-type="medline">31501885</pub-id>
          <pub-id pub-id-type="pii">5566506</pub-id>
          <pub-id pub-id-type="pmcid">PMC7703786</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref58">
        <label>58</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Tinn</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Cheng</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Lucas</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Usuyama</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Naumann</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Gao</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Poon</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Domain-specific language model pretraining for biomedical natural language processing</article-title>
          <source>ACM Trans Comput Healthcare</source>
          <year>2022</year>
          <month>01</month>
          <day>31</day>
          <volume>3</volume>
          <issue>1</issue>
          <fpage>1</fpage>
          <lpage>23</lpage>
          <pub-id pub-id-type="doi">10.1145/3458754</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref59">
        <label>59</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Alsentzer</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Murphy</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Boag</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Weng</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Jindi</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Naumann</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>McDermott</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Publicly available clinical BERT embeddings</article-title>
          <source>Proceedings of the 2nd Clinical Natural Language Processing Workshop</source>
          <year>2019</year>
          <conf-name>2nd Clinical Natural Language Processing Workshop</conf-name>
          <conf-date>Jun 7, 2019</conf-date>
          <conf-loc>Minneapolis, Minnesota, USA</conf-loc>
          <pub-id pub-id-type="doi">10.18653/v1/w19-1909</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref60">
        <label>60</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Beltagy</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Lo</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Cohan</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>SciBERT: a pretrained language model for scientific text</article-title>
          <source>Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP)</source>
          <year>2019</year>
          <conf-name>2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP)</conf-name>
          <conf-date>Nov 3–7, 2019</conf-date>
          <conf-loc>Hong Kong, China</conf-loc>
          <pub-id pub-id-type="doi">10.18653/v1/d19-1371</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref61">
        <label>61</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Pedregosa</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Varoquaux</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Gramfort</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Michel</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Thirion</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Grisel</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Blondel</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Prettenhofer</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Weiss</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Dubourg</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Vanderplas</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Passos</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Cournapeau</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Brucher</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Perrot</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Duchesnay</surname>
              <given-names>É</given-names>
            </name>
          </person-group>
          <article-title>Machine learning for neuroimaging with scikit-learn</article-title>
          <source>Front Neuroinform</source>
          <year>2014</year>
          <volume>8</volume>
          <fpage>2825</fpage>
          <lpage>30</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref62">
        <label>62</label>
        <nlm-citation citation-type="web">
          <article-title>PubMed® online training</article-title>
          <source>NIH National Library of Medicine</source>
          <year>2018</year>
          <access-date>2022-10-16</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://learn.nlm.nih.gov/documentation/training-packets/T0042010P/">https://learn.nlm.nih.gov/documentation/training-packets/T0042010P/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref63">
        <label>63</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>DeMars</surname>
              <given-names>MM</given-names>
            </name>
            <name name-style="western">
              <surname>Perruso</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>MeSH and text-word search strategies: precision, recall, and their implications for library instruction</article-title>
          <source>J Med Libr Assoc</source>
          <year>2022</year>
          <month>01</month>
          <day>01</day>
          <volume>110</volume>
          <issue>1</issue>
          <fpage>23</fpage>
          <lpage>33</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/35210959"/>
          </comment>
          <pub-id pub-id-type="doi">10.5195/jmla.2022.1283</pub-id>
          <pub-id pub-id-type="medline">35210959</pub-id>
          <pub-id pub-id-type="pii">jmla.2022.1283</pub-id>
          <pub-id pub-id-type="pmcid">PMC8830400</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref64">
        <label>64</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sharma</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Guski</surname>
              <given-names>LS</given-names>
            </name>
            <name name-style="western">
              <surname>Freund</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Gøtzsche</surname>
              <given-names>PC</given-names>
            </name>
          </person-group>
          <article-title>Suicidality and aggression during antidepressant treatment: systematic review and meta-analyses based on clinical study reports</article-title>
          <source>BMJ</source>
          <year>2016</year>
          <month>01</month>
          <day>27</day>
          <volume>352</volume>
          <fpage>i65</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.bmj.com/lookup/pmidlookup?view=long&#38;pmid=26819231"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/bmj.i65</pub-id>
          <pub-id pub-id-type="medline">26819231</pub-id>
          <pub-id pub-id-type="pmcid">PMC4729837</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref65">
        <label>65</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Miley</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Machado</surname>
              <given-names>LB</given-names>
            </name>
            <name name-style="western">
              <surname>Condo</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Jergens</surname>
              <given-names>AE</given-names>
            </name>
            <name name-style="western">
              <surname>Yoon</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Pandey</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Video capsule endoscopy and ingestible electronics: emerging trends in sensors, circuits, materials, telemetry, optics, and rapid reading software</article-title>
          <source>arXiv</source>
          <year>2022</year>
          <month>05</month>
          <day>24</day>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/2205.11751"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref66">
        <label>66</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Choi</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Won</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Mi Oh</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>An annotated corpus from biomedical articles to construct a drug-food interaction database</article-title>
          <source>J Biomed Inform</source>
          <year>2022</year>
          <month>02</month>
          <volume>126</volume>
          <fpage>103985</fpage>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2022.103985</pub-id>
          <pub-id pub-id-type="medline">35007753</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(22)00001-6</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref67">
        <label>67</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kuhn</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Johnson</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <source>Applied Predictive Modeling</source>
          <year>2013</year>
          <publisher-loc>New York</publisher-loc>
          <publisher-name>Springer</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref68">
        <label>68</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Edwards</surname>
              <given-names>IR</given-names>
            </name>
            <name name-style="western">
              <surname>Aronson</surname>
              <given-names>JK</given-names>
            </name>
          </person-group>
          <article-title>Adverse drug reactions: definitions, diagnosis, and management</article-title>
          <source>Lancet</source>
          <year>2000</year>
          <month>10</month>
          <day>07</day>
          <volume>356</volume>
          <issue>9237</issue>
          <fpage>1255</fpage>
          <lpage>9</lpage>
          <pub-id pub-id-type="doi">10.1016/S0140-6736(00)02799-9</pub-id>
          <pub-id pub-id-type="medline">11072960</pub-id>
          <pub-id pub-id-type="pii">S0140-6736(00)02799-9</pub-id>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
