<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMIR</journal-id>
      <journal-id journal-id-type="nlm-ta">J Med Internet Res</journal-id>
      <journal-title>Journal of Medical Internet Research</journal-title>
      <issn pub-type="epub">1438-8871</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v25i1e44870</article-id>
      <article-id pub-id-type="pmid">37133915</article-id>
      <article-id pub-id-type="doi">10.2196/44870</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Transferability Based on Drug Structure Similarity in the Automatic Classification of Noncompliant Drug Use on Social Media: Natural Language Processing Approach</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Eysenbach</surname>
            <given-names>Gunther</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Ru</surname>
            <given-names>Boshu</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Zhao</surname>
            <given-names>Peng, PhD</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author">
          <name name-style="western">
            <surname>Nishiyama</surname>
            <given-names>Tomohiro</given-names>
          </name>
          <degrees>MEng</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-1538-8266</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author">
          <name name-style="western">
            <surname>Yada</surname>
            <given-names>Shuntaro</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-6209-1054</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Wakamiya</surname>
            <given-names>Shoko</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-9371-1340</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Hori</surname>
            <given-names>Satoko</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-4596-5418</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Aramaki</surname>
            <given-names>Eiji</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>Department of Information Science</institution>
            <institution>Nara Institute of Science and Technology</institution>
            <addr-line>8916-5, Takayama-cho</addr-line>
            <addr-line>Ikoma, 630-0192</addr-line>
            <country>Japan</country>
            <phone>81 743 72 5250</phone>
            <email>aramaki@is.naist.jp</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-0201-3609</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Department of Information Science</institution>
        <institution>Nara Institute of Science and Technology</institution>
        <addr-line>Ikoma</addr-line>
        <country>Japan</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Division of Drug Informatics</institution>
        <institution>Keio University Faculty of Pharmacy</institution>
        <addr-line>Tokyo</addr-line>
        <country>Japan</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Eiji Aramaki <email>aramaki@is.naist.jp</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <year>2023</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>3</day>
        <month>5</month>
        <year>2023</year>
      </pub-date>
      <volume>25</volume>
      <elocation-id>e44870</elocation-id>
      <history>
        <date date-type="received">
          <day>7</day>
          <month>12</month>
          <year>2022</year>
        </date>
        <date date-type="rev-request">
          <day>27</day>
          <month>12</month>
          <year>2022</year>
        </date>
        <date date-type="rev-recd">
          <day>17</day>
          <month>3</month>
          <year>2023</year>
        </date>
        <date date-type="accepted">
          <day>29</day>
          <month>3</month>
          <year>2023</year>
        </date>
      </history>
      <copyright-statement>©Tomohiro Nishiyama, Shuntaro Yada, Shoko Wakamiya, Satoko Hori, Eiji Aramaki. Originally published in the Journal of Medical Internet Research (https://www.jmir.org), 03.05.2023.</copyright-statement>
      <copyright-year>2023</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in the Journal of Medical Internet Research, is properly cited. The complete bibliographic information, a link to the original publication on https://www.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://www.jmir.org/2023/1/e44870" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Medication noncompliance is a critical issue because of the increased number of drugs sold on the web. Web-based drug distribution is difficult to control, causing problems such as drug noncompliance and abuse. The existing medication compliance surveys lack completeness because it is impossible to cover patients who do not go to the hospital or provide accurate information to their doctors, so a social media–based approach is being explored to collect information about drug use. Social media data, which includes information on drug usage by users, can be used to detect drug abuse and medication compliance in patients.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>This study aimed to assess how the structural similarity of drugs affects the efficiency of machine learning models for text classification of drug noncompliance.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>This study analyzed 22,022 tweets about 20 different drugs. The tweets were labeled as either noncompliant use or mention, noncompliant sales, general use, or general mention. The study compares 2 methods for training machine learning models for text classification: single-sub-corpus transfer learning, in which a model is trained on tweets about a single drug and then tested on tweets about other drugs, and multi-sub-corpus incremental learning, in which models are trained on tweets about drugs in order of their structural similarity. The performance of a machine learning model trained on a single subcorpus (a data set of tweets about a specific category of drugs) was compared to the performance of a model trained on multiple subcorpora (data sets of tweets about multiple categories of drugs).</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>The results showed that the performance of the model trained on a single subcorpus varied depending on the specific drug used for training. The Tanimoto similarity (a measure of the structural similarity between compounds) was weakly correlated with the classification results. The model trained by transfer learning a corpus of drugs with close structural similarity performed better than the model trained by randomly adding a subcorpus when the number of subcorpora was small.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>The results suggest that structural similarity improves the classification performance of messages about unknown drugs if the drugs in the training corpus are few. On the other hand, this indicates that there is little need to consider the influence of the Tanimoto structural similarity if a sufficient variety of drugs are ensured.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>data mining</kwd>
        <kwd>machine learning</kwd>
        <kwd>medication noncompliance</kwd>
        <kwd>natural language processing</kwd>
        <kwd>pharmacovigilance</kwd>
        <kwd>transfer learning</kwd>
        <kwd>text classification</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <p>Medication compliance, a type of health literacy defined as a patient’s use of medications [<xref ref-type="bibr" rid="ref1">1</xref>], is a critical issue because an increased number of drugs have been sold on the web. Web-based drug distribution is difficult to control, causing problems such as drug noncompliance and abuse [<xref ref-type="bibr" rid="ref2">2</xref>]. Thus, the importance of medication compliance surveys, such as what kinds of medications tend to be abused, is increasing. However, medication compliance surveys are unreliable because it is impossible to cover patients who do not go to the hospital or provide accurate information to their doctors. This situation motivates a social media–based approach because some patients provide information about drug usage. Therefore, social media is attracting attention for collecting knowledge about drug use information [<xref ref-type="bibr" rid="ref3">3</xref>-<xref ref-type="bibr" rid="ref6">6</xref>].</p>
      <p>We attempted to use social media to catch medication compliance, people’s understanding of drugs, and other health-related information to understand the patients’ medication status and knowledge. This information should be useful as an early signal for the dissemination and understanding of regulations and safety information from drug regulatory authorities and drug suppliers. There are many potential ways to use social media; drug regulatory authorities and drug suppliers might detect specific drugs that are likely to be misused by automatically classifying comments. Some studies have linked other compliant use statistics to the number of medication noncompliance tweets, and real-time message collection might be expected to expedite drug regulation [<xref ref-type="bibr" rid="ref7">7</xref>]. Ru et al [<xref ref-type="bibr" rid="ref8">8</xref>] mentioned some patients reported serendipitous new indications for the drugs they were using for comorbidity, which is valuable information for drug repositioning on social media sites.</p>
      <p>In addition, social media is expected to be one of the methods to catch the voice of patients for the supplement of traditional questionnaire-based surveys. There are 2 methods of information extraction from social media, which is manual annotation and machine learning method. As research examples of manual annotation, Sinnenberg et al [<xref ref-type="bibr" rid="ref9">9</xref>] and Golder et al [<xref ref-type="bibr" rid="ref6">6</xref>] used it in tweets to categorize the statements about drugs for certain kinds of drugs such as drugs for cardiovascular disease or statins. Gkotsis et al [<xref ref-type="bibr" rid="ref10">10</xref>] used it in Reddit posts to understand the characteristics of users diagnosed with dementia. Wexler et al [<xref ref-type="bibr" rid="ref11">11</xref>] and Beusterien et al [<xref ref-type="bibr" rid="ref12">12</xref>] used manual coding to study certain forums related to health. As examples of machine learning methods, Mao et al [<xref ref-type="bibr" rid="ref13">13</xref>] studied how users discussed the side effects of aromatase inhibitors and concerns about risk-benefit balance. Burkhardt et al [<xref ref-type="bibr" rid="ref14">14</xref>] used a semisupervised learning method to detect side effects reported in tweets. Rastegar-Mojarad et al [<xref ref-type="bibr" rid="ref15">15</xref>] and Zhao and Yang [<xref ref-type="bibr" rid="ref16">16</xref>] use machine learning approaches to detect potential candidates for drug repositioning. Weissenbacher et al [<xref ref-type="bibr" rid="ref17">17</xref>] created an ensemble learning classifier that can identify tweets mentioning drugs and dietary supplements. Sarker and Gonzalez [<xref ref-type="bibr" rid="ref18">18</xref>] created a corpus to identify drugs on Twitter, with potential applications for monitoring drug efficacy, side effects, and user sentiment toward drugs.</p>
      <p>Moreover, some attempts have been made to detect drug abuse and medication compliance in patients [<xref ref-type="bibr" rid="ref19">19</xref>-<xref ref-type="bibr" rid="ref26">26</xref>]. Abdellaoui et al [<xref ref-type="bibr" rid="ref24">24</xref>] performed tweet classification using a topic model for 2 drugs: <italic>escitalopram</italic> and <italic>aripiprazole</italic>. Weinssenbacher et al [<xref ref-type="bibr" rid="ref19">19</xref>] proposed a method for detecting drug dosage changes in noncompliant patients. Bigeard et al [<xref ref-type="bibr" rid="ref26">26</xref>] attempted to detect drug misuse and found that using Anatomical Therapeutic Chemical (ATC) codes and text in the classification task improved the accuracy of misuse detection. However, the existing methods do not fully use information on drugs, such as the structure of the active ingredients.</p>
      <p>In our approach, the method of developing a corpus is practically a big issue because the corpus highly depends on the drug type. This means that we are suffering from covering all drug types because the nature of the text varies widely from drug to drug. As shown in <xref rid="figure1" ref-type="fig">Figure 1</xref>, medication noncompliance tweets of drugs classified as sleeping pills and anxiolytics, such as <italic>Lexotan</italic>, stand out as overdosed (<xref rid="figure1" ref-type="fig">Figure 1</xref>, left and middle). On the other hand, diuretics such as <italic>Lasix</italic> stand out in tweets suggesting that they are used for dieting (<xref rid="figure1" ref-type="fig">Figure 1</xref>, right). Thus, the messages differed for each drug type. This makes classification more difficult and results in lower accuracy. In such cases, supervised learning is optimal for classifying tweets about various drugs with high accuracy [<xref ref-type="bibr" rid="ref19">19</xref>,<xref ref-type="bibr" rid="ref27">27</xref>-<xref ref-type="bibr" rid="ref29">29</xref>], and a corpus for each drug is necessary. However, building such a corpus is time- and money-consuming.</p>
      <fig id="figure1" position="float">
        <label>Figure 1</label>
        <caption>
          <p>Our approach, transfer learning based on chemical structures, assumes similarly structured corpora are transferable.</p>
        </caption>
        <graphic xlink:href="jmir_v25i1e44870_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
      </fig>
      <p>To make use of the limited data, we attempted transfer learning to reuse the data for training, in which a corpus created for a specific drug is used for other medications. We used drug structural similarity as a training method. Drugs with similar chemical structures are likely to have similar mechanisms of action and can be used for similar purposes. Specifically, Martin et al [<xref ref-type="bibr" rid="ref30">30</xref>] demonstrated that structurally similar drugs have similar mechanisms of action. Meyer et al [<xref ref-type="bibr" rid="ref31">31</xref>] used the structural information of a drug to predict its usage. Therefore, it is conceivable that tweets which mention similar drugs about medication noncompliance are also expected to be similar [<xref ref-type="bibr" rid="ref19">19</xref>,<xref ref-type="bibr" rid="ref27">27</xref>-<xref ref-type="bibr" rid="ref29">29</xref>]. For example, the drug <italic>Flunitrazepam</italic>, which has a chemical structure similar to <italic>Lexotan</italic>, is likely to be used effectively as training data (<xref rid="figure1" ref-type="fig">Figure 1</xref>).</p>
      <p>Therefore, we performed transfer learning of a corpus for drugs with similar chemical structures. To conduct transfer learning, we prepared a MediA corpus data set to monitor medication noncompliance. In this corpus, we defined noncompliance as a message that indicates the speaker’s incorrect perception of handling a drug. Specifically, messages showing noncompliance were labeled as “<italic>Noncompliant use or mention</italic> (<italic>NC-u/m</italic>),” among which messages about buying and selling were marked as “<italic>Noncompliant sales</italic> (<italic>NC-s</italic>),” and messages about medication that was not noncompliant were labeled as “<italic>General use</italic> (<italic>G-u</italic>).” All other messages were labeled as “<italic>General mention</italic> (<italic>G-m</italic>).”</p>
      <p>The contributions of this study are as follows:</p>
      <list list-type="order">
        <list-item>
          <p>Construction of a corpus labeled for medication noncompliance.</p>
        </list-item>
        <list-item>
          <p>We propose a transfer learning method that uses chemical structures. Language processing can use these features, but this has not yet been addressed in the existing research.</p>
        </list-item>
      </list>
      <p>In this study, we performed transfer learning to classify tweets about different drugs using a model trained on tweets about specific drugs in our corpus and discussed the results in terms of drug characteristics. In addition, we focused on the chemical structure of the drugs and verified their learning efficiency using the similarity of chemical structures. These results suggest that learning efficiency improves with limited drug data.</p>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Materials</title>
        <p>The corpus consisted of 22,022 tweets referring to 20 drugs labeled according to noncompliance. The 20 drugs included <italic>Loxonin</italic> (<italic>Loxoprofen</italic>) and <italic>Voltaren</italic> (<italic>Diclofenac</italic>) for pain relief; <italic>Myslee</italic> (<italic>Zolpidem</italic>), <italic>Flunitrazepam</italic>, <italic>Lexotan</italic> (<italic>Bromazepam</italic>), <italic>Lunesta</italic> (<italic>Eszopiclone</italic>), <italic>Depas</italic> (<italic>Etizolam</italic>), and <italic>Belsomra</italic> (<italic>Suvorexant</italic>) for sleep and antianxiety; <italic>Paxil</italic> (<italic>Paroxetine</italic>), <italic>Lexapro</italic> (<italic>Escitalopram</italic>), <italic>Sertraline</italic>, <italic>Abilify</italic> (<italic>Aripiprazole</italic>), <italic>Contomin</italic> (<italic>Chlorpromazine</italic>), <italic>Zyprexa</italic> (<italic>Olanzapine</italic>), and <italic>Risperdal</italic> (<italic>Risperidone</italic>) for antipsychotic drugs; <italic>Restamine</italic> (<italic>Diphenhydramine</italic>) for antiallergic drugs; <italic>Medicon</italic> (<italic>Dextromethorphan</italic>) for a cough suppressant; <italic>Zithromax</italic> (<italic>Azithromycin</italic>) for an antibiotic; <italic>Metformin</italic> for diabetes treatment; and <italic>Lasix</italic> (<italic>Furosemide</italic>) for a diuretic. <italic>Flunitrazepam, Sertraline,</italic> and <italic>Metformin</italic> are generic names. The words used as drug queries were “<italic>Loxonin</italic>,” “<italic>Voltaren</italic>,” “<italic>Myslee</italic>,” “<italic>Flunitrazepam</italic>,” “<italic>Lexotan</italic>,” “<italic>Lunesta</italic>,” “<italic>Depas</italic>,” “<italic>Belsomra</italic>,” “<italic>Paxil</italic>,” “<italic>Lexapro</italic>,” “<italic>Sertraline</italic>,” “<italic>Abilify</italic>,” “<italic>Contomin</italic>,” “<italic>Zyprexa</italic>,” “<italic>Risperdal</italic>,” “<italic>Restamin</italic>,” “<italic>Medicon</italic>,” “<italic>Zithromax</italic>,” “<italic>Metformin</italic>,” and “<italic>Lasix,</italic>” respectively. The 20 drugs were selected based on the following criteria: (1) they are commonly prescribed drugs or used as over-the-counter drugs, and the query is a brand name or generic name, and (2) the number of tweets in the past 3 years must be more than 1000 to ensure sufficient volume. We manually selected the 20 drug queries with less advertisements and promotional messages. Tweets were collected using 20 drug queries from January 1, 2017, to December 31, 2020, before random sampling 1000 tweets for each drug.</p>
        <p>In this corpus, noncompliance was defined as a tweet that could be read as the writer’s incorrect perception of handling a drug and was categorized into four types: <italic>noncompliant use or mention, noncompliant sales, general use</italic>, and <italic>general mention,</italic> as shown in <xref ref-type="boxed-text" rid="box1">Textbox 1</xref>. Specifically, tweets that could be read as noncompliant were marked as “<italic>Noncompliant use or mention</italic> (<italic>NC-u/m</italic>),” tweets related to buying and selling were labeled as “<italic>Noncompliant sales</italic> (<italic>NC-s</italic>),” tweets related to medication that were not noncompliant were labeled as “<italic>General use</italic> (<italic>G-u</italic>),” and tweets other than those are marked as “<italic>General mention</italic> (<italic>G-m</italic>).” Even if it is not a definitive noncompliance, a statement, including exaggeration, is defined as noncompliance. For instance, we judged the first example is doubted as noncompliance because it is doubted that the user took more drugs than he needed. The reason why we set the criteria if the statement is possibly doubted as noncompliance is for capturing the small signal of noncompliance. <xref ref-type="boxed-text" rid="box1">Textbox 1</xref> presents a part of examples of the MediA corpus. The detailed examples and guidelines of the corpus are shown in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p>
        <p>As for the annotation results, of 22,022 cases, 4630 were “<italic>NC-u/m</italic>,” 1577 were “<italic>NC-s</italic>,” 8326 were “<italic>G-u</italic>,” and 7489 were “<italic>G-m</italic>.” The Cohen kappa coefficient was 0.695, indicating a substantial agreement [<xref ref-type="bibr" rid="ref32">32</xref>]. Annotation was performed by 3 persons, 1 with pharmacological knowledge and 2 with sufficient experience in annotating biomedical documents.</p>
        <boxed-text id="box1" position="float">
          <title>Examples of MediA corpus.</title>
          <p>
            <bold>Noncompliant use or mention (NC-u/m)</bold>
          </p>
          <list list-type="bullet">
            <list-item>
              <p>デパス多めに飲んだ (I took more <italic>Depas</italic>)</p>
            </list-item>
            <list-item>
              <p>デパスの処方やめるって言われたら生きていかれないと思う (I don’t think I could live with myself if they told me to stop prescribing <italic>Depas</italic>)</p>
            </list-item>
            <list-item>
              <p>眠剤とデパスに依存症になって，カー！！デパス効いてきてふわふわ気持ちいい (I’ve become addicted to sleeping pills and <italic>Depas</italic>, I feel lightheaded and comfortable as <italic>Depas</italic> is working)</p>
            </list-item>
          </list>
          <p>
            <bold>Noncompliant sales</bold>
            <bold>(NC-s)</bold>
          </p>
          <list list-type="bullet">
            <list-item>
              <p>レクサプロ⋅ジェネリック抗うつ剤のレクサプロジェネリック医療品でうつ病や、パニック障害、対人恐怖症、不安障害に有効的です 20 mg × 200錠 ¥14,000 (US $104) (<italic>Lexapro</italic> Generic: Antidepressant <italic>Lexapro</italic> generic medical product effective for depression, panic disorder, interpersonal phobia, anxiety disorder 20 mg × 200 tablets ¥14,000 [US $104])</p>
            </list-item>
          </list>
          <p>
            <bold>General use (G-u)</bold>
          </p>
          <list list-type="bullet">
            <list-item>
              <p>リスパダール飲んだ<inline-graphic xlink:href="jmir_v25i1e44870_fig9.png" xlink:type="simple" mimetype="image"/>ぞ。(I took <italic>Risperdal</italic>)</p>
            </list-item>
          </list>
          <p>
            <bold>General mention (G-m)</bold>
          </p>
          <list list-type="bullet">
            <list-item>
              <p>ラシックスなしで着順上げながら三冠完走って只者じゃなかったね (He was not a simpleton to finish the Triple Crown without <italic>Lasix</italic> while improving his finishing order)</p>
            </list-item>
          </list>
        </boxed-text>
      </sec>
      <sec>
        <title>Experiment Design</title>
        <p>We conducted an experiment to compare the learning efficiency of text classification for drug noncompliance. The objective of the study was to clarify how the structural similarity of drugs affects the learning models for the text classification of drugs. The motivation for this experiment was as follows: Each active ingredient in a drug has a unique structure. We hypothesized that texts whose drugs had similar chemical structures would be similar. Therefore, we expected that the similarity of the chemical structures of the drugs would help train a model for text classification.</p>
        <p>There are 2 methods, single-subcorpus transfer learning and multi-subcorpus incremental learning, which we designed in this study. In the single subcorpus transfer learning, we classified tweets mentioning other drug queries using a model trained on every single drug. We compared the structural similarity and model classification performance to investigate the relationship between the similarity and classification metrics. In multi-subcorpus incremental learning, we checked the classification performance of models trained by tweets mentioning the drug query selected in order of similarity. We demonstrate the usefulness of similarity by comparing it with a randomly trained model.</p>
        <p>This learning method comes from the idea of the following usage: When pharmaceutical companies and authorities use social media to catch the potential signal from social media of medication noncompliance for each drug, they use models trained. To evaluate medication noncompliance in a low-resource language, it is necessary to begin with the creation of a corpus. However, the size of the corpus and the drugs selected should be limited because corpus creation is costly. A certain drug corpus is essential if it can be used for other drug texts by transfer learning.</p>
      </sec>
      <sec>
        <title>Classifier</title>
        <p>Experiments were conducted using bidirectional encoder representations from transformer (BERT)–based classifiers. A pretrained model of BERT (we adopted the pretrained BERT model “bert-base-Japanese-whole-word-masking” downloaded from Huggingface Hub [<xref ref-type="bibr" rid="ref33">33</xref>]) using Japanese Wikipedia was exploited and fine-tuned using the MediA corpus. The model consisted of 12 layers, 768-dimensional hidden layers, and 12 attention heads. We used the CLS token of the last layer to classify texts. A classification task was performed to evaluate the usefulness of this corpus.</p>
        <p>We used BERT as a text classification model since the BERT model achieved better results compared to light-weighted models such as Word2vec embedding+LSTM and N-gram+traditional models. Specifically, Al-Garadi et al [<xref ref-type="bibr" rid="ref34">34</xref>] compared BERT and the model used Twitter Glove embeddings + BiLSTM model in tweet classification of drug use and showed BERT was a better performance than the BiLSTM-based model. Tassone et al [<xref ref-type="bibr" rid="ref35">35</xref>] also compared the model of BERT and XGBoost for tweet classification and showed BERT obtained better results.</p>
        <sec>
          <title>Initial Settings</title>
          <p>The labeled data set of the MediA corpus was divided into 3 parts in an 80:10:10 ratio; the larger set was used for training and the 2 smaller sets for development and testing. For all the models trained in this study, the training was stopped at the point where the validation loss was the smallest.</p>
        </sec>
        <sec>
          <title>Single Subcorpus Transfer Learning</title>
          <p>Let us say that a pair of drug queries <italic>i</italic> and <italic>j</italic> are given labels of tweets <italic>D<sub>j</sub></italic> that mention drug query <italic>j</italic> predicted by using model <italic>M<sub>i</sub></italic> built with tweets <italic>D<sub>i</sub></italic> that mention drug query <italic>i</italic>. In the case of <italic>i</italic> ≠ <italic>j</italic>, the data set <italic>D<sub>i</sub></italic> was partitioned into a 90:10 ratio, and the larger set was used for training and the smaller set for development, and <italic>D<sub>j</sub></italic> was the test set. In the case of <italic>i</italic> = <italic>j</italic>, the data set <italic>D<sub>i</sub></italic> was divided into 3 parts in an 80:10:10 ratio, and the larger set was used for training and the 2 smaller sets for development and testing. Because the data set was small, and data bias was considerable, random oversampling was performed to ensure an equal proportion of the 4 labels.</p>
        </sec>
        <sec>
          <title>Multi-subcorpus Incremental Learning</title>
          <p>We predicted the labels of tweets <italic>D<sub>j</sub></italic> mentioning drug query <italic>K</italic> = {<italic>k<sub>i</sub></italic>} using the model <italic>M<sub>K</sub></italic> built with tweets <italic>D<sub>K</sub></italic> mentioning drug query <italic>K</italic>. <italic>K</italic> was the set of drug queries shown in the Methods section, containing 1 to 19 drugs, except drug query <italic>j</italic>. We divided the data set <italic>D<sub>K</sub></italic> into 90:10 and used the larger set for training and 2 smaller sets for developing <italic>D<sub>K</sub></italic> as the test set. We obtained the accuracy for the 20 drugs from this experiment and calculated the mean of the values. When adding the training data, we compared models trained using data chosen at random with models trained using data selected from those with similar structures. We defined <italic>simX</italic> as the result of a model trained with X drugs of similar structure and <italic>rndX</italic> as the result of a model trained with <italic>X</italic> drugs selected randomly.</p>
        </sec>
      </sec>
      <sec>
        <title>Drug Structure Similarity</title>
        <p>To quantitatively calculate drug structure similarity, we used the Tanimoto similarity, which indicates the degree of similarity of chemical structures [<xref ref-type="bibr" rid="ref36">36</xref>]. It was calculated by dividing the size of the product set of compound A and compound B fingerprints by the size of the sum set of compounds A and B. It is calculated as the percentage of bits in the substructure common to the 2 compounds.</p>
        <p>To calculate the Tanimoto similarity, the chemical formula of each drug was converted into a simplified molecular input line entry system (SMILES) [<xref ref-type="bibr" rid="ref37">37</xref>] to obtain the Morgan fingerprint vector. The radius size and the number of bits were set to 2 and 1024 bits, respectively.</p>
      </sec>
      <sec>
        <title>Ethical Considerations</title>
        <p>This study did not require participants to be involved in any physical or mental intervention. As this research did not use personally identifiable information, it was exempt from institutional review board approval in accordance with the Ethical Guidelines for Medical and Health Research Involving Human Subjects stipulated by the Japanese national government.</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Single Subcorpus Transfer Learning</title>
        <p>The results of the validation using transfer learning are shown in <xref rid="figure2" ref-type="fig">Figure 2</xref>. The vertical and horizontal axes of the heatmap represent drug queries for the training and test data, respectively. The color intensity corresponds to the macro F1 values. The overall trend is that the values in the diagonal lines are the highest, indicating that learning using the corresponding query is the most efficient. However, <italic>Myslee</italic>, <italic>Flunitrazepam</italic>, <italic>Lexotan</italic>, <italic>Depas</italic>, <italic>Belsomra</italic>, <italic>Paxil</italic>, <italic>Lexapro</italic>, <italic>Sertraline</italic>, <italic>Abilify</italic>, <italic>Contomin</italic>, and <italic>Risperdal</italic> had darker areas that corresponded to the same drugs as well as the specific type of drugs. These drugs are classified into sleeping pills, anxiolytics, and antipsychotics. The darker colors of the areas suggest that tweets including these drug queries are available to each other for transfer learning, indicating a high possibility of transfer learning for drugs in similar categories.</p>
        <p>The Tanimoto similarity between the drugs is shown in <xref rid="figure3" ref-type="fig">Figure 3</xref>. This value is a numerical measure of the structural similarity of compounds, with a similarity of 1.0 for the same drug. Drugs used for similar purposes such as <italic>Loxonin</italic> and <italic>Voltaren</italic> are often structurally similar.</p>
        <p>The relationship between the Tanimoto similarity and F1 values for each drug is shown in <xref rid="figure4" ref-type="fig">Figure 4</xref>. The vertical and horizontal axes were standardized with a mean of 0 and a variance of 1. The correlation between the Tanimoto similarity and the F1 value was 0.278 (P&#60;.05). This result indicates that structural similarity is weakly correlated with the classification results.</p>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>Value of F1 score for transfer learning.</p>
          </caption>
          <graphic xlink:href="jmir_v25i1e44870_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <fig id="figure3" position="float">
          <label>Figure 3</label>
          <caption>
            <p>The Tanimoto similarity between each drug.</p>
          </caption>
          <graphic xlink:href="jmir_v25i1e44870_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <fig id="figure4" position="float">
          <label>Figure 4</label>
          <caption>
            <p>Relationship between the Tanimoto similarity and F1 value for each drug.</p>
          </caption>
          <graphic xlink:href="jmir_v25i1e44870_fig4.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Multi-subcorpus Incremental Learning</title>
        <p>The results of tweet classification by BERT and validation by transfer learning are presented in <xref ref-type="table" rid="table1">Table 1</xref>. The left panel of <xref ref-type="table" rid="table1">Table 1</xref> (initial setting) shows the case where all data were used for training, whereas the right panel of <xref ref-type="table" rid="table1">Table 1</xref> (transfer learning) shows the training results without data from the target drug query. <italic>SimX</italic> results from a model trained with X drugs of similar structures.</p>
        <p><xref ref-type="table" rid="table1">Table 1</xref> shows that the <italic>Rnd3</italic> and <italic>Sim3</italic> results using 3 queries varied for each drug; however, <italic>Sim3</italic>, which was trained from drugs with high structural similarity, showed better overall values. Looking at each accuracy, the value of <italic>Sim</italic>3 for more drugs is 0.3 points higher than <italic>Rnd3</italic>, and the average value is higher. On the other hand, some values are higher for <italic>Rnd1</italic> than <italic>Sim1</italic>, even for randomly selected. This is due to the following factors. First, some drugs with different mechanisms have high structural similarity, such as <italic>Voltaren</italic> and <italic>Lasix</italic>, which have the highest structural similarity in this corpus. <italic>Voltaren</italic> is used as an antipyretic analgesic and <italic>Lasix</italic> is a prescription drug used as a diuretic. Thus, the textual properties are very different. The results of <italic>Voltaren</italic>, 0.618 for <italic>Rnd1</italic> and 0.454 for <italic>Sim1</italic>, show that the method of using a high-similarity drug for training does not work well. Second, even when drugs have other mechanisms and high structural similarity, selecting multiple drugs increases the likelihood that those with similar action mechanisms will be chosen. For example, drugs with high structural similarity to <italic>Voltaren</italic> include <italic>Lasix</italic>, <italic>Sertraline</italic>, and <italic>Loxonin</italic>. <italic>Loxonin</italic> is the same antipyretic analgesic, and adding <italic>Loxonin</italic> significantly improves the results (<italic>Lasix</italic>:0.454; <italic>Lasix</italic> + <italic>Sertraline</italic>:0.418; <italic>Lasix</italic> + <italic>Sertraline</italic> + <italic>Loxonin</italic>:0.634). Thus, selecting multiple drugs with high structural similarity implies that it is more likely that drugs with similar usage can be selected as training data rather than selecting a single drug.</p>
        <p><xref rid="figure5" ref-type="fig">Figure 5</xref> shows a comparison of the accuracies of the 2 models. For <italic>Sim</italic>, the classification model using these similarities is trained by transfer with a data set created from drugs with close similarities. On the other hand, for <italic>Rnd</italic>, the model is trained by transferring a data set created from drugs selected at random. <italic>Sim</italic> showed better results than <italic>Rnd</italic> in the middle of the learning process; when approximately 10 drugs were added to the training data, there was no significant difference between the results learned randomly and the similarity.</p>
        <p><xref rid="figure6" ref-type="fig">Figure 6</xref> shows the plot of each drug pair for each drug name. All plots are categorized into 3 major types: <italic>OTC-rel type</italic> contains an over-the-counter (OTC) drug in one of the pairs; <italic>antipsycho type</italic> is a combination of antipsychotic medications such as sleeping pills, anxiolytics, and antischizophrenics; and <italic>other type</italic> is any other combination.</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Comparison of initial setting and transfer learning.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="120"/>
            <col width="100"/>
            <col width="80"/>
            <col width="60"/>
            <col width="80"/>
            <col width="90"/>
            <col width="0"/>
            <col width="70"/>
            <col width="80"/>
            <col width="80"/>
            <col width="80"/>
            <col width="80"/>
            <col width="80"/>
            <thead>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="6">Initial setting, F1 score</td>
                <td colspan="6">Transfer learning, accuracy</td>
              </tr>
              <tr valign="bottom">
                <td>
                  <break/>
                </td>
                <td>NC-U/M<sup>a</sup></td>
                <td>NC-S<sup>b</sup></td>
                <td>G-u<sup>c</sup></td>
                <td>G-m<sup>d</sup></td>
                <td>Average</td>
                <td colspan="2">Rnd1</td>
                <td>Sim1</td>
                <td>Rnd3</td>
                <td>Sim3</td>
                <td>Rnd10</td>
                <td>Sim10</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Loxonin</td>
                <td>41.7</td>
                <td>0.0</td>
                <td>76.1</td>
                <td>45.3</td>
                <td>65.6</td>
                <td colspan="2">64.8</td>
                <td>63.6</td>
                <td>60.6</td>
                <td>
                  <italic>65.8</italic>
                  <sup>e</sup>
                </td>
                <td>63.3</td>
                <td>64.9</td>
              </tr>
              <tr valign="top">
                <td>Voltaren</td>
                <td>50.0</td>
                <td>33.3</td>
                <td>77.3</td>
                <td>65.7</td>
                <td>71.0</td>
                <td colspan="2">
                  <italic>61.8</italic>
                </td>
                <td>45.4</td>
                <td>48.8</td>
                <td>
                  <italic>63.4</italic>
                </td>
                <td>66.0</td>
                <td>66.1</td>
              </tr>
              <tr valign="top">
                <td>Myslee</td>
                <td>52.6</td>
                <td>70.6</td>
                <td>54.3</td>
                <td>69.6</td>
                <td>58.9</td>
                <td colspan="2">44.4</td>
                <td>45.1</td>
                <td>52.6</td>
                <td>53.2</td>
                <td>61.0</td>
                <td>60.5</td>
              </tr>
              <tr valign="top">
                <td>Flunitrazepam</td>
                <td>44.4</td>
                <td>96.0</td>
                <td>69.2</td>
                <td>84.5</td>
                <td>73.3</td>
                <td colspan="2">53.5</td>
                <td>
                  <italic>57.4</italic>
                </td>
                <td>65.3</td>
                <td>66.5</td>
                <td>69.0</td>
                <td>68.8</td>
              </tr>
              <tr valign="top">
                <td>Lexotan</td>
                <td>66.7</td>
                <td>100.0</td>
                <td>77.7</td>
                <td>60.6</td>
                <td>74.1</td>
                <td colspan="2">42.5</td>
                <td>
                  <italic>54.1</italic>
                </td>
                <td>61.9</td>
                <td>61.3</td>
                <td>68.2</td>
                <td>69.1</td>
              </tr>
              <tr valign="top">
                <td>Lunesta</td>
                <td>53.3</td>
                <td>75.0</td>
                <td>67.4</td>
                <td>58.5</td>
                <td>63.1</td>
                <td colspan="2">54.4</td>
                <td>52.8</td>
                <td>58.6</td>
                <td>57.8</td>
                <td>66.2</td>
                <td>68.2</td>
              </tr>
              <tr valign="top">
                <td>Depas</td>
                <td>64.0</td>
                <td>93.3</td>
                <td>75.2</td>
                <td>71.7</td>
                <td>74.4</td>
                <td colspan="2">45.8</td>
                <td>
                  <italic>51.4</italic>
                </td>
                <td>50.0</td>
                <td>
                  <italic>57.1</italic>
                </td>
                <td>59.9</td>
                <td>59.4</td>
              </tr>
              <tr valign="top">
                <td>Belsomra</td>
                <td>54.5</td>
                <td>0.0</td>
                <td>73.4</td>
                <td>74.6</td>
                <td>70.5</td>
                <td colspan="2">
                  <italic>55.4</italic>
                </td>
                <td>51.2</td>
                <td>61.1</td>
                <td>
                  <italic>64.8</italic>
                </td>
                <td>64.1</td>
                <td>66.3</td>
              </tr>
              <tr valign="top">
                <td>Paxil</td>
                <td>54.5</td>
                <td>75.0</td>
                <td>78.1</td>
                <td>87.5</td>
                <td>80.6</td>
                <td colspan="2">51.6</td>
                <td>49.7</td>
                <td>60.8</td>
                <td>60.1</td>
                <td>65.6</td>
                <td>68.2</td>
              </tr>
              <tr valign="top">
                <td>Lexapro</td>
                <td>48.6</td>
                <td>95.7</td>
                <td>81.8</td>
                <td>75.0</td>
                <td>77.1</td>
                <td colspan="2">29.1</td>
                <td>
                  <italic>58.8</italic>
                </td>
                <td>58.0</td>
                <td>56.5</td>
                <td>66.6</td>
                <td>65.4</td>
              </tr>
              <tr valign="top">
                <td>Sertraline</td>
                <td>50.0</td>
                <td>40.0</td>
                <td>78.6</td>
                <td>65.0</td>
                <td>70.1</td>
                <td colspan="2">58.6</td>
                <td>55.3</td>
                <td>
                  <italic>64.6</italic>
                </td>
                <td>58.5</td>
                <td>65.7</td>
                <td>67.9</td>
              </tr>
              <tr valign="top">
                <td>Abilify</td>
                <td>46.7</td>
                <td>100.0</td>
                <td>78.3</td>
                <td>64.9</td>
                <td>71.3</td>
                <td colspan="2">56.9</td>
                <td>
                  <italic>63.3</italic>
                </td>
                <td>62.5</td>
                <td>
                  <italic>69.0</italic>
                </td>
                <td>74.7</td>
                <td>72.3</td>
              </tr>
              <tr valign="top">
                <td>Contomin</td>
                <td>56.2</td>
                <td>83.3</td>
                <td>78.8</td>
                <td>76.9</td>
                <td>74.7</td>
                <td colspan="2">
                  <italic>51.0</italic>
                </td>
                <td>30.5</td>
                <td>64.0</td>
                <td>61.9</td>
                <td>66.4</td>
                <td>67.2</td>
              </tr>
              <tr valign="top">
                <td>Zyprexa</td>
                <td>16.7</td>
                <td>0.0</td>
                <td>67.3</td>
                <td>71.4</td>
                <td>63.0</td>
                <td colspan="2">53.9</td>
                <td>
                  <italic>63.2</italic>
                </td>
                <td>53.7</td>
                <td>
                  <italic>63.7</italic>
                </td>
                <td>66.7</td>
                <td>67.3</td>
              </tr>
              <tr valign="top">
                <td>Risperdal</td>
                <td>48.0</td>
                <td>80.0</td>
                <td>60.0</td>
                <td>71.0</td>
                <td>63.0</td>
                <td colspan="2">56.2</td>
                <td>57.5</td>
                <td>62.0</td>
                <td>59.3</td>
                <td>70.4</td>
                <td>69.6</td>
              </tr>
              <tr valign="top">
                <td>Resutamine</td>
                <td>72.2</td>
                <td>0.0</td>
                <td>65.1</td>
                <td>75.5</td>
                <td>71.1</td>
                <td colspan="2">
                  <italic>40.8</italic>
                </td>
                <td>33.5</td>
                <td>36.2</td>
                <td>
                  <italic>49.4</italic>
                </td>
                <td>51.7</td>
                <td>51.6</td>
              </tr>
              <tr valign="top">
                <td>Medicon</td>
                <td>76.2</td>
                <td>58.8</td>
                <td>64.3</td>
                <td>67.7</td>
                <td>70.0</td>
                <td colspan="2">36.6</td>
                <td>34.5</td>
                <td>
                  <italic>49.6</italic>
                </td>
                <td>45.7</td>
                <td>
                  <italic>52.0</italic>
                </td>
                <td>49.0</td>
              </tr>
              <tr valign="top">
                <td>Zithromax</td>
                <td>0.0</td>
                <td>95.7</td>
                <td>71.4</td>
                <td>90.5</td>
                <td>87.0</td>
                <td colspan="2">
                  <italic>62.2</italic>
                </td>
                <td>48.8</td>
                <td>55.5</td>
                <td>55.5</td>
                <td>
                  <italic>75.7</italic>
                </td>
                <td>66.3</td>
              </tr>
              <tr valign="top">
                <td>Metformin</td>
                <td>47.1</td>
                <td>88.2</td>
                <td>66.7</td>
                <td>93.7</td>
                <td>87.3</td>
                <td colspan="2">60.0</td>
                <td>71.4</td>
                <td>69.2</td>
                <td>
                  <italic>74.8</italic>
                </td>
                <td>74.4</td>
                <td>78.8</td>
              </tr>
              <tr valign="top">
                <td>Lasix</td>
                <td>75.3</td>
                <td>93.7</td>
                <td>48.5</td>
                <td>84.2</td>
                <td>79.5</td>
                <td colspan="2">
                  <italic>47.7</italic>
                </td>
                <td>26.2</td>
                <td>
                  <italic>48.4</italic>
                </td>
                <td>40.6</td>
                <td>
                  <italic>54.3</italic>
                </td>
                <td>47.5</td>
              </tr>
              <tr valign="top">
                <td>Average</td>
                <td>59.7</td>
                <td>86.4</td>
                <td>73.1</td>
                <td>76.6</td>
                <td>72.3</td>
                <td colspan="2">51.4</td>
                <td>50.7</td>
                <td>57.1</td>
                <td>
                  <italic>59.2</italic>
                </td>
                <td>64.7</td>
                <td> 65.1</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table1fn1">
              <p><sup>a</sup>NC-u/m: noncompliant use or mention.</p>
            </fn>
            <fn id="table1fn2">
              <p><sup>b</sup>NC-s: noncompliant sales.</p>
            </fn>
            <fn id="table1fn3">
              <p><sup>c</sup>G-u: general use.</p>
            </fn>
            <fn id="table1fn4">
              <p><sup>d</sup>G-m: general mention.</p>
            </fn>
            <fn id="table1fn5">
              <p><sup>e</sup>Values individually at least 3 points higher than the corresponding value and averages at least 2 points higher than the corresponding value are in italics.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <fig id="figure5" position="float">
          <label>Figure 5</label>
          <caption>
            <p>Comparison of the accuracy of the 2 models. <italic>Sim</italic> is a model transfer learned from a data set of drugs with close similarity; <italic>Rnd</italic> is a model transfer learned from a data set of randomly selected drugs.</p>
          </caption>
          <graphic xlink:href="jmir_v25i1e44870_fig5.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <fig id="figure6" position="float">
          <label>Figure 6</label>
          <caption>
            <p>The Tanimoto similarity and F1 score pairs for each drug. <italic>OTC-rel type</italic> contains an over-the-counter (OTC) drug in one of the pairs; <italic>antipsycho type</italic> is a combination of antipsychotic medications such as sleeping pills, anxiolytics, and antischizophrenics; and <italic>other type</italic> is any other combination.).</p>
          </caption>
          <graphic xlink:href="jmir_v25i1e44870_fig6.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Results</title>
        <p>This study observed that the learning efficiency in transfer learning is better for drugs with similar structures in a small corpus. Creating a large drug corpus is costly because it requires expertise and renewing the corpus because new drugs are often introduced. Therefore, the efficient usage of a small corpus is essential. A small drug corpus conveys information about the drugs themselves, such as their names and the structures of their active ingredients. Based on our findings, a drug-based metric, such as structural similarity, will contribute to model training, especially when resources such as corpora and budget are limited, such as in low-resource languages.</p>
        <p>In <xref rid="figure6" ref-type="fig">Figure 6</xref>, <italic>OTC-rel type</italic> includes <italic>Voltaren</italic>, <italic>Loxonin</italic>, and <italic>Restamine</italic> in one of the pairs, and the F1 score tends to be lower overall. We hypothesize that the reason for the low F1 score is that pairs containing these drugs are less likely to have personal remote drug transactions classified as <italic>NC-s</italic>, and the tendency of their messages is different from that of prescribed drugs. In fact, OTC drug messages are more about individual transfers than remote drug transactions. <xref rid="figure6" ref-type="fig">Figure 6</xref> shows the macro average of the F1 scores, but the scores of NC-s might lower the overall results. Additionally, the similarity tended to be relatively low. This is possibly because the analgesic drugs <italic>Voltaren</italic> and <italic>Loxonin</italic> and the antiallergic drug <italic>Restamine</italic> tend to have different structures than benzodiazepines and tricyclic antidepressants, which are the primary drugs selected in this study. Under the current experimental conditions, it is challenging to use transfer learning across prescription and OTC drugs.</p>
        <p><italic>Antipsycho type</italic> tends to have high F1 scores and similarity, possibly due to the similar textual properties and structures of antipsychotic drugs. The combination of these benzodiazepine sleep medications is the most common type of antipsychotic. Among antipsychotics, benzodiazepine sleep medications are most likely to be textually similar.</p>
        <p><xref rid="figure7" ref-type="fig">Figure 7</xref> compares the results of single-corpus transfer learning for drugs with similar structure and drugs with similar indications. In this figure, we visualize the results as pairs of sleeping pills as drugs with the same indication, pairs of sleeping pills and antipsychotics as drugs with similar indications, and pairs of sleeping pills and others as drugs with no similar indications. We also defined pairs of structural similarity as having a structural similarity greater than 0.15 and pairs without a structural similarity as having a structural similarity smaller than 0.03. As can be seen from this figure, the results of transfer learning are comparable for drug pairs with similar indications and drug pairs with high structural similarity. It also clearly shows the inefficiency of transfer learning for drugs with low structural similarity. These results indicate the usefulness of transfer learning by using structural similarity.</p>
        <p>In our study, it is assumed that drugs with similar chemical structures can be used for similar purposes. This is based on the result demonstrated by Martin et al [<xref ref-type="bibr" rid="ref30">30</xref>] that structurally similar drugs have similar mechanisms of action. The usage of drugs can also be considered similar. The similarity in usage means that the noncompliance of the drugs is similar and the texts are also similar. Through our study, we believe that we have shown that the structural similarity of drugs is useful for transfer learning of these textual classifications.</p>
        <p>In addition, Jo et al [<xref ref-type="bibr" rid="ref38">38</xref>] used deep learning to predict usage from SMILES transformed from chemical structures. Since most of the drugs selected in this study are antipsychotics classified as drugs for the nervous system, and they predicted several uses of drugs, including the nervous system, with about 90% accuracy, better results could be obtained by using models that can handle structural information in more detail, such as deep learning models, rather than just simple similarity.</p>
        <p><xref rid="figure8" ref-type="fig">Figure 8</xref> plots the relationship between the number of labeled tweets and the F1 value for each drug in the corpus, indicating that the F1 value increases with the number of tweets. On the other hand, the F1 scores of <italic>NC-s</italic>, unlike the different categories, do not depend significantly on the number of tweets, and tweets classified as <italic>NC-s</italic> are similar in content, even if the type of drug mentioned differs. The F1 score for categorizing a tweet as abuse was 0.53 [<xref ref-type="bibr" rid="ref29">29</xref>], which is considered adequate. The overall F1 score was 0.723, which is also a favorable result compared to those in previous studies [<xref ref-type="bibr" rid="ref29">29</xref>]. Since the F1 score reached its peak when the number of tweets with the corresponding label reached approximately 500, this inferred that 500 tweets are one of the guidelines when preparing training data for each query.</p>
        <fig id="figure7" position="float">
          <label>Figure 7</label>
          <caption>
            <p>Comparison of the results of transfer learning for drugs with similar structure and drugs with similar indications.</p>
          </caption>
          <graphic xlink:href="jmir_v25i1e44870_fig7.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <fig id="figure8" position="float">
          <label>Figure 8</label>
          <caption>
            <p>Scatterplot showing the relationship between the number of tweets and F1 value for each drug. G-m: general mention; G-u: general use; NC-s: noncompliant sales; NC-u/m: noncompliant use or mention.</p>
          </caption>
          <graphic xlink:href="jmir_v25i1e44870_fig8.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Limitations</title>
        <p>In this study, the experiments were conducted using only 20 different types of drugs. The categories of drugs included analgesics, sleeping pills and anxiolytics, antipsychotics and antidepressants, antiallergics, antitussives, antibiotics, antidiabetics, and diuretics. Not all types were covered; expansion of the drug category is a significant issue for the future. Additionally, most drugs were categorized as antipsychotics. This bias may have affected the study results.</p>
        <p>The relatively low interannotator agreement limited the performance of the models. Annotation schemes could be improved to obtain better metrics. Furthermore, the correlations did not necessarily indicate any higher-level associations between structural similarity and metrics from the results.</p>
        <p>We only used the Tanimoto similarity as the structural similarity without considering the 3D structure. Considering that the action of the mechanism was based on the 3D structure, calculating the similarity with the 3D structure can be improved. A detailed investigation of this learning method is required.</p>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>In this study, we assessed the usefulness of the structural similarity of drugs by using a corpus annotated with medication noncompliance. It was found that structural similarity can be used for more efficient learning of training data with a limited number of drugs. On the other hand, using a corpus in the case of a new drug introduction or learning in a low-resource language with a small corpus, it is possible to provide a guideline for using training data from drugs with a similar structure. We believe that this can provide a procedure for training data for learning in low-resource languages where the differences are slight, and the corpus is limited.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>Annotation guideline of MediA corpus.</p>
        <media xlink:href="jmir_v25i1e44870_app1.pdf" xlink:title="PDF File  (Adobe PDF File), 357 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">ATC</term>
          <def>
            <p>Anatomical Therapeutic Chemical</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">BERT</term>
          <def>
            <p>bidirectional encoder representations from transformers</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">G-m</term>
          <def>
            <p>general mention</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">G-u</term>
          <def>
            <p>general use</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">NC-s</term>
          <def>
            <p>noncompliant sales</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">NC-u/m</term>
          <def>
            <p>noncompliant use or mention</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">OTC</term>
          <def>
            <p>over-the-counter</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">SMILES</term>
          <def>
            <p>simplified molecular input line entry system</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>This work was supported by Japan Science and Technology Agency Center for Advanced Intelligence Project Japanese-German-French Artificial Intelligence Research grant JPMJCR20G9, the National Institute of Informatics Center for Robust Intelligence and Social Technology, and Japan Society for the Promotion of Science KAKENHI grant JP21H03170.</p>
    </ack>
    <fn-group>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <notes>
      <sec>
        <title>Data Availability</title>
        <p>The data sets generated or analyzed during this study are available [<xref ref-type="bibr" rid="ref39">39</xref>].</p>
      </sec>
    </notes>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Miller</surname>
              <given-names>TA</given-names>
            </name>
          </person-group>
          <article-title>Health literacy and adherence to medical treatment in chronic and acute illness: a meta-analysis</article-title>
          <source>Patient Educ Couns</source>
          <year>2016</year>
          <volume>99</volume>
          <issue>7</issue>
          <fpage>1079</fpage>
          <lpage>1086</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/26899632"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.pec.2016.01.020</pub-id>
          <pub-id pub-id-type="medline">26899632</pub-id>
          <pub-id pub-id-type="pii">S0738-3991(16)30041-6</pub-id>
          <pub-id pub-id-type="pmcid">PMC4912447</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Long</surname>
              <given-names>CS</given-names>
            </name>
            <name name-style="western">
              <surname>Kumaran</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Goh</surname>
              <given-names>KW</given-names>
            </name>
            <name name-style="western">
              <surname>Bakrin</surname>
              <given-names>FS</given-names>
            </name>
            <name name-style="western">
              <surname>Ming</surname>
              <given-names>LC</given-names>
            </name>
            <name name-style="western">
              <surname>Rehman</surname>
              <given-names>IU</given-names>
            </name>
            <name name-style="western">
              <surname>Dhaliwal</surname>
              <given-names>JS</given-names>
            </name>
            <name name-style="western">
              <surname>Hadi</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Sim</surname>
              <given-names>YW</given-names>
            </name>
            <name name-style="western">
              <surname>Tan</surname>
              <given-names>CS</given-names>
            </name>
          </person-group>
          <article-title>Online pharmacies selling prescription drugs: systematic review</article-title>
          <source>Pharmacy</source>
          <year>2022</year>
          <volume>10</volume>
          <issue>2</issue>
          <fpage>42</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.mdpi.com/resolver?pii=pharmacy10020042"/>
          </comment>
          <pub-id pub-id-type="doi">10.3390/pharmacy10020042</pub-id>
          <pub-id pub-id-type="medline">35448701</pub-id>
          <pub-id pub-id-type="pii">pharmacy10020042</pub-id>
          <pub-id pub-id-type="pmcid">PMC9031186</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Onishi</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Weissenbacher</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Klein</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>O’Connor</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Gonzalez-Hernandez</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Dealing with medication non-adherence expressions in Twitter</article-title>
          <source>Proceedings of the 2018 EMNLP Workshop SMM4H</source>
          <year>2018</year>
          <conf-name>The 3rd Social Media Mining for Health Applications Workshop &#38; Shared Task</conf-name>
          <conf-date>October 31, 2018</conf-date>
          <conf-loc>Brussels, Belgium</conf-loc>
          <publisher-name>Association for Computational Linguistics</publisher-name>
          <fpage>32</fpage>
          <lpage>33</lpage>
          <pub-id pub-id-type="doi">10.18653/v1/w18-5908</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bhattacharya</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Snyder</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Malin</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Truffa</surname>
              <given-names>MM</given-names>
            </name>
            <name name-style="western">
              <surname>Marinic</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Engelmann</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Raheja</surname>
              <given-names>RR</given-names>
            </name>
          </person-group>
          <article-title>Using social media data in routine pharmacovigilance: a pilot study to identify safety signals and patient perspectives</article-title>
          <source>Pharm Med</source>
          <year>2017</year>
          <volume>31</volume>
          <issue>3</issue>
          <fpage>167</fpage>
          <lpage>174</lpage>
          <pub-id pub-id-type="doi">10.1007/s40290-017-0186-6</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Xie</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Zeng</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Fang</surname>
              <given-names>X</given-names>
            </name>
          </person-group>
          <article-title>Understanding reasons for medication nonadherence: an exploration in social media using sentiment-enriched deep learning approach</article-title>
          <source>Proceedings of the International Conference on Information Systems - Transforming Society with Digital Innovation</source>
          <year>2017</year>
          <conf-name>38th ICIS 2017</conf-name>
          <conf-date>December 10-13, 2017</conf-date>
          <conf-loc>Seoul, South Korea</conf-loc>
          <publisher-name>Association for Information Systems</publisher-name>
          <pub-id pub-id-type="doi">10.2139/ssrn.3091923</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Golder</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>O'Connor</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Hennessy</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Gross</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Gonzalez-Hernandez</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Assessment of beliefs and attitudes about statins posted on Twitter: a qualitative study</article-title>
          <source>JAMA Netw Open</source>
          <year>2020</year>
          <volume>3</volume>
          <issue>6</issue>
          <fpage>e208953</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/32584408"/>
          </comment>
          <pub-id pub-id-type="doi">10.1001/jamanetworkopen.2020.8953</pub-id>
          <pub-id pub-id-type="medline">32584408</pub-id>
          <pub-id pub-id-type="pii">2767638</pub-id>
          <pub-id pub-id-type="pmcid">PMC7317605</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sarker</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Gonzalez-Hernandez</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Ruan</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Perrone</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Machine learning and natural language processing for geolocation-centric monitoring and characterization of opioid-related social media chatter</article-title>
          <source>JAMA Netw Open</source>
          <year>2019</year>
          <volume>2</volume>
          <issue>11</issue>
          <fpage>e1914672</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/31693125"/>
          </comment>
          <pub-id pub-id-type="doi">10.1001/jamanetworkopen.2019.14672</pub-id>
          <pub-id pub-id-type="medline">31693125</pub-id>
          <pub-id pub-id-type="pii">2753983</pub-id>
          <pub-id pub-id-type="pmcid">PMC6865282</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ru</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Harris</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Yao</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>A content analysis of patient-reported medication outcomes on social media</article-title>
          <year>2015</year>
          <conf-name>2015 IEEE International Conference on Data Mining Workshop (ICDMW)</conf-name>
          <conf-date>November 14-17, 2015</conf-date>
          <conf-loc>Atlantic City, NJ</conf-loc>
          <fpage>472</fpage>
          <lpage>479</lpage>
          <pub-id pub-id-type="doi">10.1109/icdmw.2015.150</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sinnenberg</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>DiSilvestro</surname>
              <given-names>CL</given-names>
            </name>
            <name name-style="western">
              <surname>Mancheno</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Dailey</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Tufts</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Buttenheim</surname>
              <given-names>AM</given-names>
            </name>
            <name name-style="western">
              <surname>Barg</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Ungar</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Schwartz</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Brown</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Asch</surname>
              <given-names>DA</given-names>
            </name>
            <name name-style="western">
              <surname>Merchant</surname>
              <given-names>RM</given-names>
            </name>
          </person-group>
          <article-title>Twitter as a potential data source for cardiovascular disease research</article-title>
          <source>JAMA Cardiol</source>
          <year>2016</year>
          <volume>1</volume>
          <issue>9</issue>
          <fpage>1032</fpage>
          <lpage>1036</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/27680322"/>
          </comment>
          <pub-id pub-id-type="doi">10.1001/jamacardio.2016.3029</pub-id>
          <pub-id pub-id-type="medline">27680322</pub-id>
          <pub-id pub-id-type="pii">2556216</pub-id>
          <pub-id pub-id-type="pmcid">PMC5177459</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gkotsis</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Mueller</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Dobson</surname>
              <given-names>RJB</given-names>
            </name>
            <name name-style="western">
              <surname>Hubbard</surname>
              <given-names>TJB</given-names>
            </name>
            <name name-style="western">
              <surname>Dutta</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Mining social media data to study the consequences of dementia diagnosis on caregivers and relatives</article-title>
          <source>Dement Geriatr Cogn Disord</source>
          <year>2020</year>
          <volume>49</volume>
          <issue>3</issue>
          <fpage>295</fpage>
          <lpage>302</lpage>
          <pub-id pub-id-type="doi">10.1159/000509123</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wexler</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Davoudi</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Weissenbacher</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Choi</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>O'Connor</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Cummings</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Gonzalez-Hernandez</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Pregnancy and health in the age of the internet: a content analysis of online "birth club" forums</article-title>
          <source>PLoS One</source>
          <year>2020</year>
          <volume>15</volume>
          <issue>4</issue>
          <fpage>e0230947</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.plos.org/10.1371/journal.pone.0230947"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0230947</pub-id>
          <pub-id pub-id-type="medline">32287266</pub-id>
          <pub-id pub-id-type="pii">PONE-D-20-00106</pub-id>
          <pub-id pub-id-type="pmcid">PMC7156049</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Beusterien</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Tsay</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Gholizadeh</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Su</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Real-world experience with colorectal cancer chemotherapies: patient web forum analysis</article-title>
          <source>Ecancermedicalscience</source>
          <year>2013</year>
          <volume>7</volume>
          <fpage>361</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/24143155"/>
          </comment>
          <pub-id pub-id-type="doi">10.3332/ecancer.2013.361</pub-id>
          <pub-id pub-id-type="medline">24143155</pub-id>
          <pub-id pub-id-type="pii">can-7-361</pub-id>
          <pub-id pub-id-type="pmcid">PMC3796446</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mao</surname>
              <given-names>JJ</given-names>
            </name>
            <name name-style="western">
              <surname>Chung</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Benton</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Hill</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Ungar</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Leonard</surname>
              <given-names>CE</given-names>
            </name>
            <name name-style="western">
              <surname>Hennessy</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Holmes</surname>
              <given-names>JH</given-names>
            </name>
          </person-group>
          <article-title>Online discussion of drug side effects and discontinuation among breast cancer survivors</article-title>
          <source>Pharmacoepidemiol Drug Saf</source>
          <year>2013</year>
          <volume>22</volume>
          <issue>3</issue>
          <fpage>256</fpage>
          <lpage>262</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/23322591"/>
          </comment>
          <pub-id pub-id-type="doi">10.1002/pds.3365</pub-id>
          <pub-id pub-id-type="medline">23322591</pub-id>
          <pub-id pub-id-type="pmcid">PMC4380018</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Burkhardt</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Siekiera</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Glodde</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Andrade-Navarro</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Kramer</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Towards identifying drug side effects from social media using active learning and crowd sourcing</article-title>
          <year>2019</year>
          <conf-name>Biocomputing 2020: Proceedings of the Pacific Symposium</conf-name>
          <conf-date>January 3-7, 2020</conf-date>
          <conf-loc>Kohala Coast, HI</conf-loc>
          <fpage>319</fpage>
          <lpage>330</lpage>
          <pub-id pub-id-type="doi">10.1142/9789811215636_0029</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rastegar-Mojarad</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Nambisan</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Using social media data to identify potential candidates for drug repurposing: a feasibility study</article-title>
          <source>JMIR Res Protoc</source>
          <year>2016</year>
          <volume>5</volume>
          <issue>2</issue>
          <fpage>e121</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.researchprotocols.org/2016/2/e121/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/resprot.5621</pub-id>
          <pub-id pub-id-type="medline">27311964</pub-id>
          <pub-id pub-id-type="pii">v5i2e121</pub-id>
          <pub-id pub-id-type="pmcid">PMC4929348</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>CC</given-names>
            </name>
          </person-group>
          <article-title>Drug repositioning to accelerate drug development using social media data: computational study on Parkinson disease</article-title>
          <source>J Med Internet Res</source>
          <year>2018</year>
          <volume>20</volume>
          <issue>10</issue>
          <fpage>e271</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2018/10/e271/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/jmir.9646</pub-id>
          <pub-id pub-id-type="medline">30309833</pub-id>
          <pub-id pub-id-type="pii">v20i10e271</pub-id>
          <pub-id pub-id-type="pmcid">PMC6231748</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Weissenbacher</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Sarker</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Klein</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>O'Connor</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Magge</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Gonzalez-Hernandez</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Deep neural networks ensemble for detecting medication mentions in tweets</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2019</year>
          <volume>26</volume>
          <issue>12</issue>
          <fpage>1618</fpage>
          <lpage>1626</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/31562510"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/jamia/ocz156</pub-id>
          <pub-id pub-id-type="medline">31562510</pub-id>
          <pub-id pub-id-type="pii">5575394</pub-id>
          <pub-id pub-id-type="pmcid">PMC6857507</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sarker</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Gonzalez</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>A corpus for mining drug-related knowledge from Twitter chatter: language models and their utilities</article-title>
          <source>Data Brief</source>
          <year>2017</year>
          <volume>10</volume>
          <fpage>122</fpage>
          <lpage>131</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S2352-3409(16)30716-8"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.dib.2016.11.056</pub-id>
          <pub-id pub-id-type="medline">27981203</pub-id>
          <pub-id pub-id-type="pii">S2352-3409(16)30716-8</pub-id>
          <pub-id pub-id-type="pmcid">PMC5144647</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Weissenbacher</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Ge</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Klein</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>O'Connor</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Gross</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Hennessy</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Gonzalez-Hernandez</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Active neural networks to detect mentions of changes to medication treatment in social media</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2021</year>
          <volume>28</volume>
          <issue>12</issue>
          <fpage>2551</fpage>
          <lpage>2561</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/34613417"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/jamia/ocab158</pub-id>
          <pub-id pub-id-type="medline">34613417</pub-id>
          <pub-id pub-id-type="pii">6382264</pub-id>
          <pub-id pub-id-type="pmcid">PMC8633624</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Phan</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Chun</surname>
              <given-names>SA</given-names>
            </name>
            <name name-style="western">
              <surname>Bhole</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Geller</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Enabling real-time drug abuse detection in tweets</article-title>
          <year>2017</year>
          <conf-name>2017 IEEE 33rd International Conference on Data Engineering</conf-name>
          <conf-date>April 19-22, 2017</conf-date>
          <conf-loc>San Diego, CA</conf-loc>
          <fpage>1510</fpage>
          <lpage>1514</lpage>
          <pub-id pub-id-type="doi">10.1109/icde.2017.221</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ginart</surname>
              <given-names>AA</given-names>
            </name>
            <name name-style="western">
              <surname>Das</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Harris</surname>
              <given-names>JK</given-names>
            </name>
            <name name-style="western">
              <surname>Wong</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Yan</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Krauss</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Cavazos-Rehg</surname>
              <given-names>PA</given-names>
            </name>
          </person-group>
          <article-title>Drugs or dancing? Using real-time machine learning to classify streamed “dabbing” homograph tweets</article-title>
          <year>2016</year>
          <conf-name>2016 IEEE International Conference on Healthcare Informatics</conf-name>
          <conf-date>October 4-7, 2016</conf-date>
          <conf-loc>Chicago, IL</conf-loc>
          <fpage>10</fpage>
          <lpage>13</lpage>
          <pub-id pub-id-type="doi">10.1109/ichi.2016.97</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mackey</surname>
              <given-names>TK</given-names>
            </name>
            <name name-style="western">
              <surname>Kalyanam</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Katsuki</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Lanckriet</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Twitter-based detection of illegal online sale of prescription opioid</article-title>
          <source>Am J Public Health</source>
          <year>2017</year>
          <volume>107</volume>
          <issue>12</issue>
          <fpage>1910</fpage>
          <lpage>1915</lpage>
          <pub-id pub-id-type="doi">10.2105/AJPH.2017.303994</pub-id>
          <pub-id pub-id-type="medline">29048960</pub-id>
          <pub-id pub-id-type="pmcid">PMC5678375</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chary</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Genes</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Giraud-Carrier</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Hanson</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Nelson</surname>
              <given-names>LS</given-names>
            </name>
            <name name-style="western">
              <surname>Manini</surname>
              <given-names>AF</given-names>
            </name>
          </person-group>
          <article-title>Epidemiology from tweets: estimating misuse of prescription opioids in the USA from social media</article-title>
          <source>J Med Toxicol</source>
          <year>2017</year>
          <volume>13</volume>
          <issue>4</issue>
          <fpage>278</fpage>
          <lpage>286</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/28831738"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/s13181-017-0625-5</pub-id>
          <pub-id pub-id-type="medline">28831738</pub-id>
          <pub-id pub-id-type="pii">10.1007/s13181-017-0625-5</pub-id>
          <pub-id pub-id-type="pmcid">PMC5711756</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Abdellaoui</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Foulquié</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Texier</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Faviez</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Burgun</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Schück</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Detection of cases of noncompliance to drug treatment in patient forum posts: topic model approach</article-title>
          <source>J Med Internet Res</source>
          <year>2018</year>
          <volume>20</volume>
          <issue>3</issue>
          <fpage>e85</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2018/3/e85/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/jmir.9222</pub-id>
          <pub-id pub-id-type="medline">29540337</pub-id>
          <pub-id pub-id-type="pii">v20i3e85</pub-id>
          <pub-id pub-id-type="pmcid">PMC5874436</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Alvarez-Mon</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Donat-Vargas</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Santoma-Vilaclara</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>de Anta</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Goena</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Sanchez-Bayona</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Mora</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Ortega</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Lahera</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Rodriguez-Jimenez</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Quintero</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Álvarez-Mon</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Assessment of antipsychotic medications on social media: machine learning study</article-title>
          <source>Front Psychiatry</source>
          <year>2021</year>
          <volume>12</volume>
          <fpage>737684</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/34867531"/>
          </comment>
          <pub-id pub-id-type="doi">10.3389/fpsyt.2021.737684</pub-id>
          <pub-id pub-id-type="medline">34867531</pub-id>
          <pub-id pub-id-type="pmcid">PMC8637121</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bigeard</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Grabar</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Thiessard</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <article-title>Detection and analysis of drug misuses: a study based on social media messages</article-title>
          <source>Front Pharmacol</source>
          <year>2018</year>
          <volume>9</volume>
          <fpage>791</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/30140224"/>
          </comment>
          <pub-id pub-id-type="doi">10.3389/fphar.2018.00791</pub-id>
          <pub-id pub-id-type="medline">30140224</pub-id>
          <pub-id pub-id-type="pmcid">PMC6094963</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sarker</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Gonzalez</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Portable automatic text classification for adverse drug reaction detection via multi-corpus training</article-title>
          <source>J Biomed Inform</source>
          <year>2015</year>
          <volume>53</volume>
          <fpage>196</fpage>
          <lpage>207</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(14)00231-7"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2014.11.002</pub-id>
          <pub-id pub-id-type="medline">25451103</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(14)00231-7</pub-id>
          <pub-id pub-id-type="pmcid">PMC4355323</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bobicev</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Sokolova</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <person-group person-group-type="editor">
            <name name-style="western">
              <surname>Mouhoub</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Langlais</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Confused and thankful: multi-label sentiment classification of health forums</article-title>
          <source>Advances in Artificial Intelligence</source>
          <year>2017</year>
          <publisher-loc>Cham</publisher-loc>
          <publisher-name>Springer</publisher-name>
          <fpage>284</fpage>
          <lpage>289</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>O'Connor</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Sarker</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Perrone</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Gonzalez Hernandez</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Promoting reproducible research for characterizing nonmedical use of medications through data annotation: description of a Twitter corpus and guidelines</article-title>
          <source>J Med Internet Res</source>
          <year>2020</year>
          <volume>22</volume>
          <issue>2</issue>
          <fpage>e15861</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2020/2/e15861/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/15861</pub-id>
          <pub-id pub-id-type="medline">32130117</pub-id>
          <pub-id pub-id-type="pii">v22i2e15861</pub-id>
          <pub-id pub-id-type="pmcid">PMC7066507</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Martin</surname>
              <given-names>YC</given-names>
            </name>
            <name name-style="western">
              <surname>Kofron</surname>
              <given-names>JL</given-names>
            </name>
            <name name-style="western">
              <surname>Traphagen</surname>
              <given-names>LM</given-names>
            </name>
          </person-group>
          <article-title>Do structurally similar molecules have similar biological activity?</article-title>
          <source>J Med Chem</source>
          <year>2002</year>
          <volume>45</volume>
          <issue>19</issue>
          <fpage>4350</fpage>
          <lpage>4358</lpage>
          <pub-id pub-id-type="doi">10.1021/jm020155c</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Meyer</surname>
              <given-names>JG</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Miller</surname>
              <given-names>IJ</given-names>
            </name>
            <name name-style="western">
              <surname>Coon</surname>
              <given-names>JJ</given-names>
            </name>
            <name name-style="western">
              <surname>Gitter</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Learning drug functions from chemical structures with convolutional neural networks and random forests</article-title>
          <source>J Chem Inf Model</source>
          <year>2019</year>
          <volume>59</volume>
          <issue>10</issue>
          <fpage>4438</fpage>
          <lpage>4449</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1021/acs.jcim.9b00236"/>
          </comment>
          <pub-id pub-id-type="doi">10.1021/acs.jcim.9b00236</pub-id>
          <pub-id pub-id-type="medline">31518132</pub-id>
          <pub-id pub-id-type="pmcid">PMC6819987</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cohen</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>A coefficient of agreement for nominal scales</article-title>
          <source>Educ Psychol Meas</source>
          <year>1960</year>
          <volume>20</volume>
          <issue>1</issue>
          <fpage>37</fpage>
          <lpage>46</lpage>
          <pub-id pub-id-type="doi">10.1177/001316446002000104</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Devlin</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Chang</surname>
              <given-names>MW</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Toutanova</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>BERT: pre-training of deep bidirectional transformers for language understanding</article-title>
          <year>2019</year>
          <conf-name>Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers)</conf-name>
          <conf-date>June 2-7, 2019</conf-date>
          <conf-loc>Minneapolis, MN</conf-loc>
          <publisher-name>Association for Computational Linguistics</publisher-name>
          <fpage>4171</fpage>
          <lpage>4186</lpage>
          <pub-id pub-id-type="doi">10.18653/v1/n18-2</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Al-Garadi</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>YC</given-names>
            </name>
            <name name-style="western">
              <surname>Cai</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Ruan</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>O'Connor</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Graciela</surname>
              <given-names>GH</given-names>
            </name>
            <name name-style="western">
              <surname>Perrone</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Sarker</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Text classification models for the automatic detection of nonmedical prescription medication use from social media</article-title>
          <source>BMC Med Inform Decis Mak</source>
          <year>2021</year>
          <volume>21</volume>
          <issue>1</issue>
          <fpage>27</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcmedinformdecismak.biomedcentral.com/articles/10.1186/s12911-021-01394-0"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12911-021-01394-0</pub-id>
          <pub-id pub-id-type="medline">33499852</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12911-021-01394-0</pub-id>
          <pub-id pub-id-type="pmcid">PMC7835447</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Tassone</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Yan</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Simpson</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Mendhe</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Mago</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Choudhury</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Utilizing deep learning and graph mining to identify drug use on Twitter data</article-title>
          <source>BMC Med Inform Decis Mak</source>
          <year>2020</year>
          <volume>20</volume>
          <issue>suppl 11</issue>
          <fpage>304</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcmedinformdecismak.biomedcentral.com/articles/10.1186/s12911-020-01335-3"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12911-020-01335-3</pub-id>
          <pub-id pub-id-type="medline">33380324</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12911-020-01335-3</pub-id>
          <pub-id pub-id-type="pmcid">PMC7772918</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Tanimoto</surname>
              <given-names>TT</given-names>
            </name>
          </person-group>
          <source>An Elementary Mathematical Theory of Classification and Prediction</source>
          <year>1958</year>
          <publisher-loc>New York</publisher-loc>
          <publisher-name>International Business Machines Corporation</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Weininger</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>SMILES, a chemical language and information system. 1. Introduction to methodology and encoding rules</article-title>
          <source>J Chem Inf Model</source>
          <year>1988</year>
          <volume>28</volume>
          <issue>1</issue>
          <fpage>31</fpage>
          <lpage>36</lpage>
          <pub-id pub-id-type="doi">10.1021/ci00057a005</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref38">
        <label>38</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Jo</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Choi</surname>
              <given-names>HS</given-names>
            </name>
            <name name-style="western">
              <surname>Yoon</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Prediction of drug classes with a deep neural network using drug targets and chemical structure data</article-title>
          <year>2019</year>
          <conf-name>2019 IEEE International Conference on Bioinformatics and Biomedicine (BIBM)</conf-name>
          <conf-date>November 18-21, 2019</conf-date>
          <conf-loc>San Diego, CA</conf-loc>
          <fpage>664</fpage>
          <lpage>667</lpage>
          <pub-id pub-id-type="doi">10.1109/bibm47256.2019.8983104</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref39">
        <label>39</label>
        <nlm-citation citation-type="web">
          <article-title>MediACorpus_20211216</article-title>
          <source>NAIST Social Computing Lab</source>
          <access-date>2023-04-20</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://sociocom.naist.jp/download/mediacorpus_20211216">https://sociocom.naist.jp/download/mediacorpus_20211216</ext-link>
          </comment>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
