<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMIR</journal-id>
      <journal-id journal-id-type="nlm-ta">J Med Internet Res</journal-id>
      <journal-title>Journal of Medical Internet Research</journal-title>
      <issn pub-type="epub">1438-8871</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v26i1e46176</article-id>
      <article-id pub-id-type="pmid">38888956</article-id>
      <article-id pub-id-type="doi">10.2196/46176</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Monitoring Adverse Drug Events in Web Forums: Evaluation of a Pipeline and Use Case Study</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Mavragani</surname>
            <given-names>Amaryllis</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Matsuda</surname>
            <given-names>Shinichi</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Shang</surname>
            <given-names>Lili</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author">
          <name name-style="western">
            <surname>Karapetiantz</surname>
            <given-names>Pierre</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-6486-9838</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author">
          <name name-style="western">
            <surname>Audeh</surname>
            <given-names>Bissan</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-8550-8724</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Redjdal</surname>
            <given-names>Akram</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-3141-5463</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Tiffet</surname>
            <given-names>Théophile</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-2975-0553</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author">
          <name name-style="western">
            <surname>Bousquet</surname>
            <given-names>Cédric</given-names>
          </name>
          <degrees>PhD, PharmD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-9775-2476</ext-link>
        </contrib>
        <contrib id="contrib6" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Jaulent</surname>
            <given-names>Marie-Christine</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>Inserm</institution>
            <institution>Sorbonne Université</institution>
            <institution>université Paris 13, Laboratoire d’informatique médicale et d’ingénierie des connaissances en e-santé, LIMICS, F-75006</institution>
            <addr-line>15 rue de l'école de Médecine</addr-line>
            <addr-line>Paris, 75006</addr-line>
            <country>France</country>
            <phone>33 144279108</phone>
            <email>marie-christine.jaulent@inserm.fr</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-4445-7494</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Inserm</institution>
        <institution>Sorbonne Université</institution>
        <institution>université Paris 13, Laboratoire d’informatique médicale et d’ingénierie des connaissances en e-santé, LIMICS, F-75006</institution>
        <addr-line>Paris</addr-line>
        <country>France</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Service de santé publique et information médicale</institution>
        <institution>CHU de Saint Etienne</institution>
        <addr-line>42000 Saint-Etienne</addr-line>
        <country>France</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>Institut National de la Santé et de la Recherche Médicale, Université Jean Monnet, SAnté INgéniérie BIOlogie St-Etienne,  SAINBIOSE</institution>
        <addr-line>42270 Saint-Priest-en-Jarez</addr-line>
        <country>France</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Marie-Christine Jaulent <email>marie-christine.jaulent@inserm.fr</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <year>2024</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>18</day>
        <month>6</month>
        <year>2024</year>
      </pub-date>
      <volume>26</volume>
      <elocation-id>e46176</elocation-id>
      <history>
        <date date-type="received">
          <day>1</day>
          <month>2</month>
          <year>2023</year>
        </date>
        <date date-type="rev-request">
          <day>6</day>
          <month>7</month>
          <year>2023</year>
        </date>
        <date date-type="rev-recd">
          <day>20</day>
          <month>10</month>
          <year>2023</year>
        </date>
        <date date-type="accepted">
          <day>12</day>
          <month>3</month>
          <year>2024</year>
        </date>
      </history>
      <copyright-statement>©Pierre Karapetiantz, Bissan Audeh, Akram Redjdal, Théophile Tiffet, Cédric Bousquet, Marie-Christine Jaulent. Originally published in the Journal of Medical Internet Research (https://www.jmir.org), 18.06.2024.</copyright-statement>
      <copyright-year>2024</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in the Journal of Medical Internet Research, is properly cited. The complete bibliographic information, a link to the original publication on https://www.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://www.jmir.org/2024/1/e46176" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>To mitigate safety concerns, regulatory agencies must make informed decisions regarding drug usage and adverse drug events (ADEs). The primary pharmacovigilance data stem from spontaneous reports by health care professionals. However, underreporting poses a notable challenge within the current system. Explorations into alternative sources, including electronic patient records and social media, have been undertaken. Nevertheless, social media’s potential remains largely untapped in real-world scenarios.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>The challenge faced by regulatory agencies in using social media is primarily attributed to the absence of suitable tools to support decision makers. An effective tool should enable access to information via a graphical user interface, presenting data in a user-friendly manner rather than in their raw form. This interface should offer various visualization options, empowering users to choose representations that best convey the data and facilitate informed decision-making. Thus, this study aims to assess the potential of integrating social media into pharmacovigilance and enhancing decision-making with this novel data source. To achieve this, our objective was to develop and assess a pipeline that processes data from the extraction of web forum posts to the generation of indicators and alerts within a visual and interactive environment. The goal was to create a user-friendly tool that enables regulatory authorities to make better-informed decisions effectively.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>To enhance pharmacovigilance efforts, we have devised a pipeline comprising 4 distinct modules, each independently editable, aimed at efficiently analyzing health-related French web forums. These modules were (1) web forums’ posts extraction, (2) web forums’ posts annotation, (3) statistics and signal detection algorithm, and (4) a graphical user interface (GUI). We showcase the efficacy of the GUI through an illustrative case study involving the introduction of the new formula of Levothyrox in France. This event led to a surge in reports to the French regulatory authority.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>Between January 1, 2017, and February 28, 2021, a total of 2,081,296 posts were extracted from 23 French web forums. These posts contained 437,192 normalized drug-ADE couples, annotated with the Anatomical Therapeutic Chemical (ATC) Classification and Medical Dictionary for Regulatory Activities (MedDRA). The analysis of the Levothyrox new formula revealed a notable pattern. In August 2017, there was a sharp increase in posts related to this medication on social media platforms, which coincided with a substantial uptick in reports submitted by patients to the national regulatory authority during the same period.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>We demonstrated that conducting quantitative analysis using the GUI is straightforward and requires no coding. The results aligned with prior research and also offered potential insights into drug-related matters. Our hypothesis received partial confirmation because the final users were not involved in the evaluation process. Further studies, concentrating on ergonomics and the impact on professionals within regulatory agencies, are imperative for future research endeavors. We emphasized the versatility of our approach and the seamless interoperability between different modules over the performance of individual modules. Specifically, the annotation module was integrated early in the development process and could undergo substantial enhancement by leveraging contemporary techniques rooted in the Transformers architecture. Our pipeline holds potential applications in health surveillance by regulatory agencies or pharmaceutical companies, aiding in the identification of safety concerns. Moreover, it could be used by research teams for retrospective analysis of events.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>pharmacovigilance</kwd>
        <kwd>social media</kwd>
        <kwd>scraper</kwd>
        <kwd>natural language processing</kwd>
        <kwd>signal detection</kwd>
        <kwd>graphical user interface</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <sec>
        <title>Social Media as a Complementary Data Source for Pharmacovigilance</title>
        <p>One primary mission of regulatory agencies such as the FDA (Food and Drug Administration) or the EMA (European Medicines Agency) is to monitor drug usage and adverse drug events (ADEs) to mitigate the risks associated with drugs within the population. This task entails analyzing diverse data sources, including clinical trials, postmarketing surveillance, spontaneous reporting systems, and published scientific literature. Despite the wealth of available data, some ADEs are not always detected promptly, largely because of underreporting. In France, for instance, underreporting was estimated to range between 78% and 99% from 1997 to 2002 [<xref ref-type="bibr" rid="ref1">1</xref>]. To tackle this challenge, several countries have implemented systems allowing patients to report ADEs.</p>
        <p>Additional sources for detecting ADEs have been under exploration, such as electronic patient records [<xref ref-type="bibr" rid="ref2">2</xref>-<xref ref-type="bibr" rid="ref4">4</xref>] and social media platforms [<xref ref-type="bibr" rid="ref5">5</xref>-<xref ref-type="bibr" rid="ref9">9</xref>]. While some argue that social media alone cannot serve as a primary source for signal detection [<xref ref-type="bibr" rid="ref10">10</xref>], it can be viewed as a valuable secondary source for monitoring emerging adverse drug reactions or reinforcing signals previously identified through spontaneous reports stored in traditional pharmacovigilance databases [<xref ref-type="bibr" rid="ref11">11</xref>]. In a prior study by the authors, patient profiles and reported ADEs found in web forums were compared with those in the French Pharmacovigilance Database (FPVD). The forums tended to represent younger patients, more women, less severe cases, and a higher incidence of psychiatric disorder–related ADEs compared with the FPVD [<xref ref-type="bibr" rid="ref12">12</xref>]. Moreover, forums reported a greater number of unexpected ADEs. Over the past decade, several tools for evaluating social media posts have been described in the literature [<xref ref-type="bibr" rid="ref13">13</xref>]. Specifically, effective ADE detection in social media necessitates both quantitative and qualitative analyses of data [<xref ref-type="bibr" rid="ref14">14</xref>].</p>
      </sec>
      <sec>
        <title>Qualitative Approach for Individual Assessment of Posts</title>
        <p>Qualitative assessment entails evaluating whether users’ messages contain pertinent information for an assessment akin to a pharmacovigilance case report. This includes details such as the patient’s age and gender, the severity of the case, the expectedness and timeline of the adverse event, time-to-onset, dechallenge (outcome upon drug withdrawal), and rechallenge (outcome upon drug reintroduction). For instance, GlaxoSmithKline Inc. implemented the qualitative approach Insight Explorer, which facilitates the collection of extensive data for causality and quality assessment. Users can input data including personal information (eg, age range, gender) and product details (eg, name, route of administration, duration of use, dosage). This approach was adapted for the WEB-RADR (Recognizing Adverse Drug Reactions) project to manually construct a gold standard of curated patient-authored text [<xref ref-type="bibr" rid="ref15">15</xref>].</p>
      </sec>
      <sec>
        <title>Quantitative Approach for Monitoring Adverse Drug Events on Social Media</title>
        <p>Quantitative evaluation involves analyzing extracted data using descriptive and analytical statistics, such as signal detection and change-point analysis. Numerous projects have been undertaken to monitor ADEs on social media. One of the earliest projects is the PREDOSE (Prescription Drug Abuse Online Surveillance and Epidemiology) project [<xref ref-type="bibr" rid="ref5">5</xref>], which investigates the illicit use of pharmaceutical opioids reported in web forums. While the PREDOSE project showcased the potential of leveraging social media for opioid monitoring, notable limitations are the lack of deidentification and signal detection methods. MedWatcher Social, a monitoring platform for health-related web forums, Twitter, and Facebook, represents a prototype application developed in 2014 [<xref ref-type="bibr" rid="ref16">16</xref>]. Yeleswarapu et al [<xref ref-type="bibr" rid="ref6">6</xref>] outlined a semiautomatic pipeline that applies natural language processing (NLP) tasks to extract ADEs from MEDLINE abstracts and user comments from health-related websites. However, this pipeline was not intended for routine use.</p>
        <p>The Domino’s interface [<xref ref-type="bibr" rid="ref17">17</xref>], developed in 2018 by the University of Bordeaux in France and funded by the French Medicines Agency (Agence nationale de sécurité du médicament et des produits de santé [ANSM]), was designed to analyze drug misuses in health-related web forums using NLP methods and the summary of product characteristics. Initially tailored for antidepressant drugs, this tool does not primarily focus on ADE surveillance.</p>
        <p>Another pipeline, described by Nikfarjam et al in 2019 [<xref ref-type="bibr" rid="ref7">7</xref>], used a neural network–based named entity recognition system specifically designed for user-generated content in social media. This platform is dedicated to identifying the association of cutaneous ADEs with cancer therapy drugs. The study focused on a selection of drugs and only examined 8 ADEs.</p>
        <p>Magge et al [<xref ref-type="bibr" rid="ref8">8</xref>] described a pipeline aimed at the extraction and normalization of adverse drug mentions on Twitter. Their pipeline consisted of an ADE classifier designed to identify tweets mentioning an ADE, which were then mapped to a MedDRA (Medical Dictionary for Regulatory Activities Terminology) code. However, the normalization process was confined to the ADEs present in the training set. Neither Nikfarjam’s nor Magge’s pipeline provides a graphical user interface.</p>
        <p>Some private companies also offer tools for analyzing social media for pharmacovigilance purposes. For instance, the DETECT platform was developed as part of a collaborative project in France by Kappa Santé [<xref ref-type="bibr" rid="ref18">18</xref>]. This system enabled the labeling of posts with known controlled vocabulary concepts, and signal detection was conducted [<xref ref-type="bibr" rid="ref19">19</xref>]. Within the scope of this project, Expert System Company implemented BIOPHARMA Navigator to extract web forum posts, while the Luxid Annotation Server provided web services for the automatic annotation of posts.</p>
        <p>An important finding from the studies of the last decade is that while regulatory agencies have begun using data sources beyond spontaneous reports, social media has yet to be fully leveraged in real-world settings due to the immaturity of available solutions. Primarily, these solutions are essentially proofs of concept that lack scalability and are challenging for experts to evaluate routinely, primarily due to the absence of a graphical user interface to present information.</p>
        <p>Our aim was to assess the potential of integrating social media into pharmacovigilance and enhancing decision-making with this novel data source. To achieve this, our objective was to develop and assess a pipeline that processes data from the extraction of web forum posts to the generation of indicators and alerts within a visual and interactive environment. The goal was to create a user-friendly tool that enables regulatory authorities to make better-informed decisions effectively.</p>
        <p>This article presents the design and implementation of our pipeline dedicated to harnessing posts from social media. In addition, we showcase the use of the pipeline through a specific use case, emphasizing the importance of monitoring drugs in social media to better address patients’ expectations.</p>
      </sec>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Overview</title>
        <p>The PHARES project (Pharmacovigilance in Social Networks), funded from 2017 to 2019 by the French ANSM, aimed to develop a software suite (a pipeline) enabling pharmacovigilance users to analyze social networks, particularly messages posted on forums. The objective of the pipeline is to facilitate routine use through continuous post extraction and quantitative data analysis from web forums, specifically tailored for the French language.</p>
        <p>The pipeline is made up of 4 modules, each referring to its own methods (<xref rid="figure1" ref-type="fig">Figure 1</xref>):</p>
        <p>The Scraper module, which extracts posts from forums using a previously developed tool, Vigi4Med (V4M) scraper [<xref ref-type="bibr" rid="ref9">9</xref>], and produces a comma-separated values (CSV) file filled with the texts extracted.</p>
        <p>The Annotation module, which extracts elements of interest from the posts and registers annotations in CSV files, with each line representing an annotation of an ADE or a drug. When a causality relationship is identified, both an ADE and a drug are annotated on the same line.</p>
        <p>The Statistical module, which performs quantitative analysis on the annotated posts, generating numerical data, tables, or figures.</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>Structure of the PHARES’ pipeline, with modules in blue and data information in red. CSV: comma-separated values; PHARES: Pharmacovigilance in Social Networks.</p>
          </caption>
          <graphic xlink:href="jmir_v26i1e46176_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <p>The Interface module, which supports query definition and visualization of results.</p>
        <p>The methodology used to evaluate the PHARES pipeline involved comparing its performance with existing platforms mentioned above, in accordance with a set of criteria established with prospective PHARES users. The criteria, specific to each module, are as follows:</p>
        <list list-type="bullet">
          <list-item>
            <p>General level: focus on ADEs, designed for routine usage.</p>
          </list-item>
          <list-item>
            <p>Scraper: collects all posts of a selected website, performs deidentification, allows to extract posts from web forums, and is open source.</p>
          </list-item>
          <list-item>
            <p>Statistics: the temporal evolution of posts or annotations is displayed and a change-point analysis (detecting breakpoints) is possible.</p>
          </list-item>
          <list-item>
            <p>Signal detection: allows to apply at least one signal detection method, displays the temporal evolution of the proportional reporting ratio (PRR), and allows to perform a logistic regression–based signal detection method.</p>
          </list-item>
          <list-item>
            <p>Graphical user interface: has an interface for users.</p>
          </list-item>
        </list>
      </sec>
      <sec>
        <title>Scraper Module</title>
        <p>V4M Scraper is an open-source tool designed for data extraction from web forums [<xref ref-type="bibr" rid="ref9">9</xref>]. Its primary functions are optimizing scraping time, filtering out posts primarily focused on advertisements, and structuring the extracted data semantically. The module operates by taking a configuration file as input, which contains the URL of the targeted forum. The algorithm navigates through forum pages and generates resource description framework (RDF) triplets for each extracted element, allowing for potential alignment with external semantic resources. A caching mechanism has been integrated into this tool to maintain a local copy of previously visited pages, thereby avoiding redundant requests to websites for already scraped web pages, particularly in cases of errors or testing, for example. Vigi4Med V4M Scraper was customized for the PHARES project, as indicated by the red elements in Figure S1 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>. The database format (Figure S2 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>) was implemented to enhance interaction with the interface. Specifically, the main scraping script was adjusted to produce a simplified tabular format (CSV) of the extracted data and to store these data in a database. This modification aims to facilitate input to the subsequent module of the pipeline (annotation). V4M Scraper was customized to enable a continuous scraping routine, wherein data extracted from web forums are automatically and regularly annotated and registered. A log file was integrated into the scraper structure to maintain a record of the last scraped element. This log file ensures that the daily routine scraping always begins from the last scraped point. An automation tool (crontab) is used to schedule the execution of the pipeline for each forum on a daily basis at a specific time.</p>
        <p>A total of 23 public French health-related web forums were selected through a combination of Google searches and from a list of certified health websites provided by the HON Foundation, in collaboration with the French National Health Authority (HAS). The selection criteria included the requirement for websites to be hosted in France, feature a discussion board or space for sharing experiences, and have more than 10 patient contributions. Furthermore, Twitter posts are collected and analyzed by the pipeline. This is achieved using the Twitter API for data collection, followed by employing the same modules used for processing web forum posts.</p>
      </sec>
      <sec>
        <title>Annotation Module</title>
        <p>Entities corresponding to drugs and pathological conditions in social media were identified and annotated using an NLP pipeline [<xref ref-type="bibr" rid="ref20">20</xref>]. Initially, conditional random fields were used to account for global dependencies [<xref ref-type="bibr" rid="ref21">21</xref>]. Specifically, the model considers the entire sequence when making predictions for individual tokens. This approach is advantageous for entity extraction tasks, as the presence of an entity in one part of the text can influence the likelihood of other entities in the vicinity. Second, a support vector machine is used to predict the causality relationship between an entity identified as a drug and another entity identified as an ADE. The annotation method used in this module was implemented at an early stage of the pipeline’s design. Currently, the named entity recognition task of this module is undergoing revision to incorporate more recent advancements in NLP algorithms [<xref ref-type="bibr" rid="ref22">22</xref>-<xref ref-type="bibr" rid="ref26">26</xref>].</p>
        <p>In a third step, the detected annotations were normalized using codes from the MedDRA and the Anatomical Therapeutic Classification (ATC) to ensure they were suitable for signal detection purposes.</p>
        <p>MedDRA is an international medical hierarchical terminology comprising 5 levels used to code potential ADEs in pharmacovigilance. The highest level is the system organ class, which is further divided into high-level group terms, then into high-level terms, preferred terms (PTs), and finally lowest level terms. Typically, the PT level is used in pharmacovigilance signal detection.</p>
        <p>The ATC classification system is a drug classification used in France for pharmacovigilance purposes. It categorizes the active ingredients of drugs based on the organ system they primarily affect. The classification comprises 5 levels: the anatomical main group (consisting of 14 main groups), the therapeutic subgroup, the therapeutic/pharmacological subgroup, the chemical/therapeutic/pharmacological subgroup, and the chemical substance. Typically, the fifth level (chemical substance) is used in pharmacovigilance signal detection.</p>
        <p>The outputs of the annotation module are CSV files with the following variables:</p>
        <list list-type="bullet">
          <list-item>
            <p>Concerning the post: forum name, post ID, and date</p>
          </list-item>
          <list-item>
            <p>Concerning the ADE: verbatim, normalized term, unified medical language system’s concept unique identifier, and MedDRA code</p>
          </list-item>
          <list-item>
            <p>Concerning the drug: verbatim, normalized term, active ingredient, and ATC code</p>
          </list-item>
        </list>
        <p>In these CSV files, each line can consist of either an adverse event (ADE) annotation, a drug annotation, or both when a causality relationship has been identified between the drug and the ADE. <xref ref-type="table" rid="table1">Table 1</xref> provides a sample of the database.</p>
        <p>In a prior study, we selected posts where at least one ADE associated with 6 drugs (agomelatine, baclofen, duloxetine, exenatide, strontium ranelate, and tetrazepam) had been detected by this algorithm. A manual review revealed that among 5149 posts, 1284 (24.94%) were validated as pharmacovigilance cases [<xref ref-type="bibr" rid="ref12">12</xref>]. The fundamental metrics used to assess the performance of the annotation module were precision (P), recall (R), and their harmonic mean <italic>F</italic><sub>1</sub>-score. To calculate these metrics, it is necessary to evaluate false negatives for nonrecognition of relevant terms, false positives for irrelevant recognitions, and true positives for correct recognitions. Precision, recall, and <italic>F</italic><sub>1</sub>-score are defined as follows:</p>
        <disp-formula>Precision = (true positive)/(true positive + false positive); recall = (true positive)/(true positive + false negative); <italic>F</italic><sub>1</sub>-score = (2 × precision × recall)/(precision + recall) <bold>(1)</bold></disp-formula>
        <p>In the “Results” section, we present a comparison of the performance of the annotation module with the performance of state-of-the-art methods [<xref ref-type="bibr" rid="ref8">8</xref>,<xref ref-type="bibr" rid="ref22">22</xref>,<xref ref-type="bibr" rid="ref25">25</xref>,<xref ref-type="bibr" rid="ref26">26</xref>].</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Sample of the database after annotation and normalization; 8 lines corresponding to 8 annotated couples in the same post.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="60"/>
            <col width="50"/>
            <col width="70"/>
            <col width="70"/>
            <col width="100"/>
            <col width="90"/>
            <col width="100"/>
            <col width="100"/>
            <col width="100"/>
            <col width="100"/>
            <col width="80"/>
            <col width="80"/>
            <thead>
              <tr valign="bottom">
                <td>Forum name</td>
                <td>Post ID</td>
                <td>Date</td>
                <td>Time</td>
                <td>ADE<sup>a</sup> verbatim</td>
                <td>ADE normalized</td>
                <td>Concept unique identifier</td>
                <td>Drug verbatim</td>
                <td>Drug normalized</td>
                <td>Active ingredient</td>
                <td>MedDRA<sup>b</sup> code</td>
                <td>ATC<sup>c</sup> code</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Atoute</td>
                <td>7354</td>
                <td>October 8, 2018</td>
                <td>21:37:00</td>
                <td>Maux de tête</td>
                <td>Céphalée</td>
                <td>C0018681</td>
                <td>Lévothyrox</td>
                <td>LEVOTHYROX</td>
                <td>Levothyroxine sodique</td>
                <td>—<sup>d</sup></td>
                <td>H03AA01</td>
              </tr>
              <tr valign="top">
                <td>Atoute</td>
                <td>7354</td>
                <td>October 8, 2018</td>
                <td>21:37:00</td>
                <td>Maux de tête</td>
                <td>Céphalée</td>
                <td>C0018681</td>
                <td>Calcium</td>
                <td>—</td>
                <td>—</td>
                <td>—</td>
                <td>—</td>
              </tr>
              <tr valign="top">
                <td>Atoute</td>
                <td>7354</td>
                <td>October 8, 2018</td>
                <td>21:37:00</td>
                <td>Nodules cancereux</td>
                <td>—</td>
                <td>—</td>
                <td>Lévothyrox</td>
                <td>LEVOTHYROX</td>
                <td>Levothyroxine sodique</td>
                <td>—</td>
                <td>H03AA01</td>
              </tr>
              <tr valign="top">
                <td>Atoute</td>
                <td>7354</td>
                <td>October 8, 2018</td>
                <td>21:37:00</td>
                <td>Nodules cancereux</td>
                <td>—</td>
                <td>—</td>
                <td>Calcium</td>
                <td>—</td>
                <td>—</td>
                <td>—</td>
                <td>—</td>
              </tr>
              <tr valign="top">
                <td>Atoute</td>
                <td>7354</td>
                <td>October 8, 2018</td>
                <td>21:37:00</td>
                <td>Fatigue</td>
                <td>Fatigue</td>
                <td>C0015672</td>
                <td>Lévothyrox</td>
                <td>LEVOTHYROX</td>
                <td>Levothyroxine sodique</td>
                <td>10016256</td>
                <td>H03AA01</td>
              </tr>
              <tr valign="top">
                <td>Atoute</td>
                <td>7354</td>
                <td>October 8, 2018</td>
                <td>21:37:00</td>
                <td>fatigue</td>
                <td>Fatigue</td>
                <td>C0015672</td>
                <td>Calcium</td>
                <td>—</td>
                <td>—</td>
                <td>10016256</td>
                <td>—</td>
              </tr>
              <tr valign="top">
                <td>Atoute</td>
                <td>7354</td>
                <td>October 8, 2018</td>
                <td>21:37:00</td>
                <td>Perte de poids</td>
                <td>Poids diminué</td>
                <td>C0043096</td>
                <td>Lévothyrox</td>
                <td>LEVOTHYROX</td>
                <td>Levothyroxine sodique</td>
                <td>10048061</td>
                <td>H03AA01</td>
              </tr>
              <tr valign="top">
                <td>Atoute</td>
                <td>7354</td>
                <td>October 8, 2018</td>
                <td>21:37:00</td>
                <td>Perte de poids</td>
                <td>Poids diminué</td>
                <td>C0043096</td>
                <td>Calcium</td>
                <td>—</td>
                <td>—</td>
                <td>10048061</td>
                <td>—</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table1fn1">
              <p><sup>a</sup>ADE: adverse event.</p>
            </fn>
            <fn id="table1fn2">
              <p><sup>b</sup>MedDRA: Medical Dictionary for Regulatory Activities Terminology.</p>
            </fn>
            <fn id="table1fn3">
              <p><sup>c</sup>ATC: Anatomical Therapeutic Classification.</p>
            </fn>
            <fn id="table1fn4">
              <p><sup>d</sup>No data are available for this slot.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Statistical Module</title>
        <p>This module generates general statistics and diagrams for web forums or Twitter. It provides data such as the number of annotated posts (related to the drug, the ADE, or both), the count of drug-ADE pairs identified, and the distribution of ADEs’ MedDRA-PTs. In addition, a change-point analysis method was used to detect significant changes over time in the mean number of posts mentioning the drug and ADE [<xref ref-type="bibr" rid="ref27">27</xref>].</p>
        <p>Besides, several statistical signal detection methods were implemented to generate potential signals. Safety signals, which provide information on adverse events that may potentially be caused by a medicine, were further evaluated by pharmacovigilance experts to determine the causal relationship between the medicine and the reported adverse event.</p>
        <p>The statistical module implements 3 signal detection methods, including 2 well-known and frequently used disproportionality signal detection methods: the PRR [<xref ref-type="bibr" rid="ref28">28</xref>] and the reporting odds ratio (ROR) [<xref ref-type="bibr" rid="ref29">29</xref>]. In addition, a complementary method, a logistic regression–based signal detection method known as the class imbalanced subsampling lasso [<xref ref-type="bibr" rid="ref30">30</xref>], was used.</p>
        <p>PRR and ROR are akin to a relative risk and an odds ratio, respectively. However, they differ in their denominators: as the number of exposed patients is typically unknown in pharmacovigilance databases, the denominator in PRR and ROR calculations is the number of cases reported in the pharmacovigilance database.</p>
        <p>PRR and ROR are specific to each drug-ADE pair and can be directly computed from the contingency table (<xref ref-type="table" rid="table2">Table 2</xref>).</p>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>Contingency table for disproportionality analysis.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="330"/>
            <col width="340"/>
            <col width="330"/>
            <thead>
              <tr valign="top">
                <td/>
                <td>Adverse drug event of interest</td>
                <td>Other adverse drug events</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Drug of interest</td>
                <td>
                  <italic>A</italic>
                </td>
                <td>
                  <italic>b</italic>
                </td>
              </tr>
              <tr valign="top">
                <td>Other drugs</td>
                <td>
                  <italic>C</italic>
                </td>
                <td>
                  <italic>d</italic>
                </td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
        <p>The PRR compares the proportion of an ADE among all the ADEs reported for a specific drug with the same proportion for all other drugs in the database (Equation 2). A PRR significantly greater than 1 suggests that the ADE is more frequently reported for patients taking the drug of interest, while a PRR equal to 1 suggests independence between the 2 variables.</p>
        <disp-formula>PRR = [a/(a + b)]/[c/(c + d)] <bold>(2)</bold></disp-formula>
        <p>The ROR quantifies the strength of the association between drug administration and the occurrence of the ADE. It represents the ratio of the odds of drug administration when the ADE is present to the odds of drug administration when the ADE is absent (Equation 3). When the 2 events are independent, the ROR equals 1. An ROR significantly greater than 1 suggests that drug administration is associated with the presence of the ADE.</p>
        <disp-formula>ROR = <italic>ad</italic>/<italic>bc</italic> <bold>(3)</bold></disp-formula>
        <p>We considered events over posts for the calculation of disproportionality statistics. If the same drug-ADE pair was identified multiple times within a post, the pair was counted as many times as it occurred in the calculation.</p>
        <p>Disproportionality analysis has certain limitations, including the confounding effect resulting from coreported drugs and the masking effect, where the background relative reporting rate of an ADE is distorted by extensive reporting on the ADE with a specific drug or drug group. Caster et al [<xref ref-type="bibr" rid="ref31">31</xref>] demonstrated through 2 real case examples how multivariate regression–based approaches can address these issues. Harpaz et al also suggested that logistic regression could be used for safety surveillance [<xref ref-type="bibr" rid="ref32">32</xref>]. Initially designed for pharmacovigilance case reports, we hypothesize that they may also be applicable to posts.</p>
        <p>The logistic regression model specifically focuses on a particular ADE or a group of ADEs. It involves creating a vector that represents the presence (1) or absence (0) of the ADE of interest in the pharmacovigilance case (in our case, in the post). Additionally, a matrix is generated to represent the administration or nonadministration of all drugs in the database by the patient (1 for administration and 0 for nonadministration). Figure S3 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> illustrates an example of using logistic regression. In our case, we assumed that if a drug was annotated in the post, it was taken by the patient. The logistic regression aims to predict the probability of the presence of the ADE (ADE=1) of interest based on the presence of all (<italic>N<sub>m</sub></italic>) drugs in the database (Equation 4), where <italic>X</italic> represents the distribution of the presence/absence of the drugs. The adjusted factors included only concomitant medications, as patient-related factors are often missing in web forums’ posts. Therefore, we did not need to address the impact of missing data, which should be evaluated when necessary.</p>
        <disp-formula>ln([P(X&#124;ADE=1)]/[P(X&#124;ADE=0)]) = a + b1 × Drug1 + <sup>...</sup> + bi × Drug<sub>i</sub> + <sup>.. .</sup>+ bNm × Drug<sub>Nm</sub> <bold>(4)</bold></disp-formula>
        <p>The selection of the drugs depends on the parameter <italic>b<sub>i</sub></italic>. If <italic>b<sub>i</sub></italic>&#60;0, the drug <italic>i</italic> decreases the risk of the ADE, and if <italic>b<sub>i</sub></italic>&#62;0, the drug <italic>i</italic> increases the risk of the ADE.</p>
        <p>Then, 2 sets are defined:</p>
        <list list-type="bullet">
          <list-item>
            <p><italic>S</italic><sub>1</sub>: set of <italic>n</italic><sub>1</sub> posts with an annotation of the ADEs of interest.</p>
          </list-item>
          <list-item>
            <p><italic>S</italic><sub>0</sub>: set of <italic>n</italic><sub>0</sub> posts without an annotation of the ADEs of interest.</p>
          </list-item>
        </list>
        <p>In our case <italic>n</italic><sub>0</sub>&#62;&#62;<italic>n</italic><sub>1</sub>, indicating a significant imbalance toward posts lacking annotations of the ADEs of interest. To address this issue, we took a subsample with a more favorable ratio of posts with annotated ADEs versus those without. Additionally, to enhance result stability, we conducted multiple draws instead of just one.</p>
        <p>In practice, we generated <italic>B</italic> subsamples. Each subsample was constructed by randomly drawing, with replacement, <italic>n</italic><sub>1</sub> posts from <italic>S</italic><sub>1</sub> and R posts from <italic>S</italic><sub>0</sub>, where R=max(4<italic>n</italic><sub>1</sub>, 4<italic>N<sub>m</sub></italic>). The choice of 4<italic>n</italic><sub>1</sub> was inspired by case-control studies, while 4<italic>N<sub>m</sub></italic> was included to ensure an adequate number of observations considering the multitude of predictors.</p>
        <p>The maximum number of drug predictors is set to 50 and the method is then applied on <italic>B</italic>=250 drawings. Finally, the distribution of interest is the distribution of the number of times the drug was selected as a predictor (<italic>b<sub>i</sub></italic>&#62;0). The drugs retained as final predictors are those in which the <inline-graphic xlink:href="jmir_v26i1e46176_fig7.png" xlink:type="simple" mimetype="image"/> quantile of this distribution is superior to 0. <inline-graphic xlink:href="jmir_v26i1e46176_fig8.png" xlink:type="simple" mimetype="image"/> can be equal to 5, 10, or 15.</p>
        <p>We implemented a change-point analysis method described in [<xref ref-type="bibr" rid="ref27">27</xref>] to detect whether there was a change in the evolution over time of a chosen statistic, such as the number of a specific drug-ADE pair, the number of ADEs associated with a specific drug, or the number of drugs associated with a specific ADE. The method uses the Cumulative Sum (CUSUM) algorithm to analyze the evolution of statistics over time, comparing current values with the period mean. It identifies breakpoints by calculating the highest difference in statistical values and comparing it with random samples. The process repeats for periods before and after detected breakpoints until no more are found.</p>
      </sec>
      <sec>
        <title>User Interface Module</title>
        <p>The user interface module facilitates user interaction with the pipeline in a user-friendly manner. The interface comprises a dashboard divided into 2 main parts. The left dark column (<xref rid="figure2" ref-type="fig">Figure 2</xref>) serves as a control sidebar, where users can select parameters to filter the data, including the forum, period, drug(s) according to the ATC classification, and ADE(s) according to a level in the MedDRA hierarchy. On the right side of the interface, various visualizations are available, organized into several tabs such as “Forum Statistics” and “Consultation of Posts,” with additional tabs for statistics that become active upon querying.</p>
        <p>Before applying a specific query, the interface provides general information about the currently available data (<xref rid="figure2" ref-type="fig">Figure 2</xref>), including the total annotated posts since 2017 (n=2,081,296) and total annotations since 2017 (n=2,454,310). In addition, a “Consultation of Tweets” tab (not visible in the figure) displays the total annotated tweets since March 2020 (n=46,153).</p>
        <p>Furthermore, several tabs corresponding to different types of statistics, including “Forums Statistics” and “Twitter Statistics,” provide general statistics and diagrams for web forums and Twitter. Examples of these are pie charts showing forum distribution, line charts depicting the evolution of drug and ADE mentions, histograms displaying ADE distribution by system organ class, and line charts illustrating the temporal trend of posts containing the drug and an ADE, as shown in <xref rid="figure3" ref-type="fig">Figures 3</xref> and <xref rid="figure4" ref-type="fig">4</xref>. The “Annotations Plot” tab displays annotations of drugs and adverse effects selected by the user, along with forum information, PTs, high-level terms, high-level group terms, dates, and hours. The “Logistic Regression” tab allows users to choose parameters for applying logistic regression. In the “Disproportionality” tab, users can choose between the PRR and ROR methods, with the time evolution of the chosen method displayed. The “Change-Point” tab enables analysis of temporal evolution, with identified breakpoints indicated. The “Consultation of Posts” and “Consultation of Tweets” tabs provide details on annotated posts/tweets, including downloadable tables. The statistical module performs calculations based on user queries, updating the interface accordingly. If multiple drugs or adverse events are selected, they are treated as new entities for analysis.</p>
        <p>The interface was implemented using the R language and environment (R Foundation) for statistical computing and graphics [<xref ref-type="bibr" rid="ref33">33</xref>], leveraging the Shiny package [<xref ref-type="bibr" rid="ref34">34</xref>] for development.</p>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>Home of the interface before a query. The section on the left allows to perform a query, while the central section shows the total number of annotated posts since 2017 (n=2,081,296) and the total number of annotations since 2017 (2,454,310).</p>
          </caption>
          <graphic xlink:href="jmir_v26i1e46176_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <fig id="figure3" position="float">
          <label>Figure 3</label>
          <caption>
            <p>The Statistic forum tab displaying the central section's upper part after a query (Paracetamol and dizziness). The image displays the number of posts related to the selected couple, the number of times the selected couples is annotated, a pie chart with the distribution of web forums, and a line chart with the evolution of the mentions of the drug, the ADE, and the couple. ADE: adverse drug event.</p>
          </caption>
          <graphic xlink:href="jmir_v26i1e46176_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Ethical Considerations</title>
        <p>A statement by an Institutional Review Board was not required because we used only publicly available data that do not necessitate Institutional Review Board review.</p>
        <p>This study complied with the European General Data Protection Regulation (GDPR), which has been in force since 2018 in Europe [<xref ref-type="bibr" rid="ref35">35</xref>]. The GDPR enhances the protection of individuals by introducing the right to be informed about the processing of personal data. However, informing each user individually may be impractical. Therefore, the GDPR introduces 2 legal conditions where informed consent is not mandatory, which can be interpreted as supporting the processing of web forum posts for pharmacovigilance (Article 9): “(e) processing relates to personal data which are manifestly made public by the data subject; [. . .] (i) processing is necessary for reasons of public interest in the area of public health, such as [. . .] ensuring high standards of quality and safety of health care and of medicinal products . . ..” The GDPR also requires data processing to “not permit or no longer permits the identification of data subjects” (Article 89). Deidentification was conducted during the extraction of posts from web forums to ensure privacy [<xref ref-type="bibr" rid="ref9">9</xref>]. User identifiers in the main RDF file were encrypted using the SHA1 algorithm [<xref ref-type="bibr" rid="ref36">36</xref>]. The correspondence between these encrypted identifiers and the original keys is presented in RDF triplets in a separate file, referred to as the “keys file.” Therefore, the only way to retrieve the original authors’ identities is by concatenating the main RDF containing the encrypted data with the keys file, which is kept in a secured location. Moreover, all our data processing was carried out on a secured server with restricted access.</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>General Results About the Pipeline</title>
        <p>The primary outcome of this study is the operational PHARES pipeline itself. Daily extraction and annotation of posts are initiated and imported into the database linked to the user interface. In this paper, the platform’s use will be demonstrated through a specific use case on the analysis of Levothyrox ADE mentions in forums (discussed later). In addition, we conducted a comparative analysis of the PHARES pipeline with the existing platforms mentioned in the “Introduction” section, based on the criteria listed in the “Methods” section.</p>
        <p>Of the 10 identified pipelines, half were public and half were private. While 8 out of 10 focused on ADEs, only 4 were designed for routine usage. Five scrapers were open source, and all posts from considered websites were extracted by only 6 of the scrapers (with others extracting posts under certain conditions). Six scraped web forum posts, but only 3 performed deidentification. Additionally, 4 pipelines focused on the French language. A total of 6 pipelines displayed the temporal evolution of the number of posts, but only 1 conducted a change-point analysis. Signal detection methods were performed by only 4 of them, with none displaying the temporal evolution of the PRR nor a logistic regression–based method. Finally, 6 of them had an interface (<xref ref-type="table" rid="table3">Table 3</xref>).</p>
        <table-wrap position="float" id="table3">
          <label>Table 3</label>
          <caption>
            <p>PHARES<sup>a</sup> and identified pipelines’ characteristics match with the identified evaluation criteria<sup>b</sup>.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="150"/>
            <col width="50"/>
            <col width="50"/>
            <col width="80"/>
            <col width="50"/>
            <col width="70"/>
            <col width="50"/>
            <col width="60"/>
            <col width="0"/>
            <col width="60"/>
            <col width="0"/>
            <col width="70"/>
            <col width="70"/>
            <col width="0"/>
            <col width="60"/>
            <col width="70"/>
            <col width="60"/>
            <col width="50"/>
            <thead>
              <tr valign="top">
                <td>Pipeline</td>
                <td colspan="3">General</td>
                <td colspan="5">Scraper</td>
                <td colspan="2">Annotation</td>
                <td colspan="3">Statistics</td>
                <td colspan="4">Signal detection</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Focus on ADEs<sup>c</sup></td>
                <td>Routine usage</td>
                <td>Public/private</td>
                <td>All posts</td>
                <td>Deidentification</td>
                <td>Web forums</td>
                <td>Open source</td>
                <td colspan="2">French language</td>
                <td colspan="2">Temporal evolution</td>
                <td>Change-point analysis</td>
                <td colspan="2">Signal detection</td>
                <td>PRR<sup>d</sup> temporal evolution</td>
                <td>Logistic regression</td>
                <td>Interface</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>PREDOSE<sup>e</sup></td>
                <td>X</td>
                <td>✓</td>
                <td>Public</td>
                <td>✓</td>
                <td>X</td>
                <td>✓</td>
                <td>✓</td>
                <td colspan="2">X</td>
                <td colspan="2">✓</td>
                <td>X</td>
                <td colspan="2">X</td>
                <td>X</td>
                <td>X</td>
                <td>✓</td>
              </tr>
              <tr valign="top">
                <td>Insight Explorer</td>
                <td>✓</td>
                <td>X</td>
                <td>Private</td>
                <td>X</td>
                <td>X</td>
                <td>X</td>
                <td>✓</td>
                <td colspan="2">X</td>
                <td colspan="2">X</td>
                <td>X</td>
                <td colspan="2">X</td>
                <td>X</td>
                <td>X</td>
                <td>✓</td>
              </tr>
              <tr valign="top">
                <td>MedWatcher Social</td>
                <td>✓</td>
                <td>✓</td>
                <td>Public</td>
                <td>X</td>
                <td>X</td>
                <td>✓</td>
                <td>✓</td>
                <td colspan="2">X</td>
                <td colspan="2">✓</td>
                <td>X</td>
                <td colspan="2">✓</td>
                <td>X</td>
                <td>X</td>
                <td>✓</td>
              </tr>
              <tr valign="top">
                <td>Yeleswarapu et al [<xref ref-type="bibr" rid="ref6">6</xref>]</td>
                <td>✓</td>
                <td>X</td>
                <td>Private</td>
                <td>X</td>
                <td>X</td>
                <td>X</td>
                <td>X</td>
                <td colspan="2">X</td>
                <td colspan="2">X</td>
                <td>X</td>
                <td colspan="2">✓</td>
                <td>X</td>
                <td>X</td>
                <td>X</td>
              </tr>
              <tr valign="top">
                <td>Domino</td>
                <td>X</td>
                <td>✓</td>
                <td>Public</td>
                <td>✓</td>
                <td>X</td>
                <td>✓</td>
                <td>✓</td>
                <td colspan="2">✓</td>
                <td colspan="2">✓</td>
                <td>X</td>
                <td colspan="2">X</td>
                <td>X</td>
                <td>X</td>
                <td>✓</td>
              </tr>
              <tr valign="top">
                <td>Nikfarjam et al [<xref ref-type="bibr" rid="ref7">7</xref>]</td>
                <td>✓</td>
                <td>X</td>
                <td>Public and Private</td>
                <td>X</td>
                <td>X</td>
                <td>X</td>
                <td>X</td>
                <td colspan="2">X</td>
                <td colspan="2">X</td>
                <td>X</td>
                <td colspan="2">X</td>
                <td>X</td>
                <td>X</td>
                <td>X</td>
              </tr>
              <tr valign="top">
                <td>Magge et al [<xref ref-type="bibr" rid="ref8">8</xref>]</td>
                <td>✓</td>
                <td>X</td>
                <td>Public</td>
                <td>✓</td>
                <td>X</td>
                <td>X</td>
                <td>✓</td>
                <td colspan="2">X</td>
                <td colspan="2">✓</td>
                <td>X</td>
                <td colspan="2">X</td>
                <td>X</td>
                <td>X</td>
                <td>X</td>
              </tr>
              <tr valign="top">
                <td>ADR-PRISM<sup>f</sup></td>
                <td>✓</td>
                <td>X</td>
                <td>Public and Private</td>
                <td>✓</td>
                <td>✓</td>
                <td>✓</td>
                <td>X</td>
                <td colspan="2">✓</td>
                <td colspan="2">✓</td>
                <td>X</td>
                <td colspan="2">✓</td>
                <td>X</td>
                <td>X</td>
                <td>✓</td>
              </tr>
              <tr valign="top">
                <td>Kappa Santé</td>
                <td>✓</td>
                <td>✓</td>
                <td>Private</td>
                <td>✓</td>
                <td>✓</td>
                <td>✓</td>
                <td>X</td>
                <td colspan="2">✓</td>
                <td colspan="2">✓</td>
                <td>✓</td>
                <td colspan="2">✓</td>
                <td>X</td>
                <td>X</td>
                <td>✓</td>
              </tr>
              <tr valign="top">
                <td>Expert System</td>
                <td>✓</td>
                <td>X</td>
                <td>Private</td>
                <td>✓</td>
                <td>✓</td>
                <td>✓</td>
                <td>X</td>
                <td colspan="2">✓</td>
                <td colspan="2">X</td>
                <td>X</td>
                <td colspan="2">X</td>
                <td>X</td>
                <td>X</td>
                <td>✓</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table3fn1">
              <p><sup>a</sup>PHARES: Pharmacovigilance in Social Networks.</p>
            </fn>
            <fn id="table3fn2">
              <p><sup>b</sup>The X symbol means that the characteristic is missing and the symbol ✓ means the characteristic is fulfilled.</p>
            </fn>
            <fn id="table3fn3">
              <p><sup>c</sup>ADE: adverse drug event.</p>
            </fn>
            <fn id="table3fn4">
              <p><sup>d</sup>PRR: proportional reporting ratio.</p>
            </fn>
            <fn id="table3fn5">
              <p><sup>e</sup>PREDOSE: Prescription Drug Abuse Online Surveillance and Epidemiology.</p>
            </fn>
            <fn id="table3fn6">
              <p><sup>f</sup>ADR-PRISM: Adverse Drug Reaction from Patient Reports in Social Media.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Annotation Module’s Comparison With Up-to-Date State-of-the-Art Methods</title>
        <p>We also compared the performance of our annotation process with those of up-to-date state-of-the-art methods (<xref ref-type="table" rid="table4">Table 4</xref>).</p>
        <p>While the annotation module demonstrated good performance for named entity recognition (<italic>F</italic><sub>1</sub>-score=0.886), it remains slightly below the state of the art. Presently, in medical texts, the best performances are achieved by Hussain et al [<xref ref-type="bibr" rid="ref25">25</xref>] and Ding et al [<xref ref-type="bibr" rid="ref26">26</xref>] for the named entity recognition task, and by Xia [<xref ref-type="bibr" rid="ref22">22</xref>] for the relationship extraction task. On Twitter, known for its notably more complex data, Hussain et al [<xref ref-type="bibr" rid="ref25">25</xref>] achieved slightly better results than our annotator, while Ding et al [<xref ref-type="bibr" rid="ref26">26</xref>] achieved slightly worse results.</p>
        <table-wrap position="float" id="table4">
          <label>Table 4</label>
          <caption>
            <p>Comparison of our annotation process’ performances with up-to-date state-of-the-art methods. Performances are given as precision, recall, and F1-score and are divided into 2 categories<sup>a</sup>.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="120"/>
            <col width="80"/>
            <col width="190"/>
            <col width="190"/>
            <col width="230"/>
            <col width="190"/>
            <thead>
              <tr valign="top">
                <td>Annotator</td>
                <td>Language</td>
                <td>Data</td>
                <td>Natural language processing method</td>
                <td>Named entity recognition (precision; recall; <italic>F</italic><sub>1</sub>-score)</td>
                <td>Relationship extraction (precision; recall; <italic>F</italic><sub>1</sub>-score)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>PHARES<sup>b</sup></td>
                <td>French</td>
                <td>Patient’s web drug review</td>
                <td>Conditional random fields and support vector machines</td>
                <td>0.926; 0.845; 0.886</td>
                <td>0.683; 0.956; 0.797</td>
              </tr>
              <tr valign="top">
                <td>Magge et al [<xref ref-type="bibr" rid="ref8">8</xref>]</td>
                <td>English</td>
                <td>Twitter</td>
                <td>BERT<sup>c</sup> neural networks</td>
                <td>0.82; 0.76; 0.78</td>
                <td>—<sup>d</sup></td>
              </tr>
              <tr valign="top">
                <td>Xia [<xref ref-type="bibr" rid="ref22">22</xref>]</td>
                <td>English</td>
                <td>Medical texts</td>
                <td>HAMLE<sup>e</sup> model</td>
                <td>—</td>
                <td>0.929; 0.914; 0.921</td>
              </tr>
              <tr valign="top">
                <td>Hussain et al [<xref ref-type="bibr" rid="ref25">25</xref>]</td>
                <td>English</td>
                <td>Medical texts (PubMed) and Twitter</td>
                <td>BERT</td>
                <td>0.982; 0.964; 0.976 (PubMed) and 0.840; 0.861; 0.896 (X/Twitter)</td>
                <td>—</td>
              </tr>
              <tr valign="top">
                <td>Ding et al [<xref ref-type="bibr" rid="ref26">26</xref>]</td>
                <td>English</td>
                <td>Medical texts (PubMed) and Twitter</td>
                <td>BGRU<sup>f</sup> + char LSTM<sup>g</sup> attention + auxiliary classifier</td>
                <td>0.867; 0.948; 0.906 (PubMed) and 0.785; 0.914; 0.844 (Twitter)</td>
                <td> —</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table4fn1">
              <p><sup>a</sup>The 2 categories are entity recognition, which is the detection of a drug or ADE mention, and relationship extraction, which is the detection of a relation between a drug and an ADE.</p>
            </fn>
            <fn id="table4fn2">
              <p><sup>b</sup>PHARES: Pharmacovigilance in Social Networks.</p>
            </fn>
            <fn id="table4fn3">
              <p><sup>c</sup>BERT: Bidirectional Encoder Representations from Transformer.</p>
            </fn>
            <fn id="table4fn4">
              <p><sup>d</sup>Not available.</p>
            </fn>
            <fn id="table4fn5">
              <p><sup>e</sup>HAMLE: Historical Awareness Multi-Level Embedding.</p>
            </fn>
            <fn id="table4fn6">
              <p><sup>f</sup>BGRU: Bidirectional Gated Recurrent Unit.</p>
            </fn>
            <fn id="table4fn7">
              <p><sup>g</sup>LSTM: Long-Short-Term-Memory.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Summary of the Result</title>
        <p>From January 1, 2017, to February 28, 2021, a total of 2,081,296 posts were extracted from 23 French web forums (<xref ref-type="table" rid="table5">Table 5</xref>). We obtained 713,057 normalized annotations of drugs, 1,527,004 normalized annotations of ADEs, and 437,192 annotations of normalized drug-ADE couples. The number of posts annotated with at least one normalized drug-ADE couple was equal to 125,279 (6.02%). <xref ref-type="table" rid="table4">Table 4</xref> summarizes the number of posts extracted per forum, the publication dates, and the description of the web forum. For 1 forum, the publication dates were not available. A total of 9 were generalist health forums, 3 were specialized for parents of a young baby, 2 for families, 3 for mothers, 2 specialized in thyroid issues, 1 for pregnant women, 1 for women, 1 for parents of a teenager or for teenagers, 1 for sports persons, and 1 specialized in rare diseases.</p>
        <table-wrap position="float" id="table5">
          <label>Table 5</label>
          <caption>
            <p>Number of extracted posts per forum, publication dates of the first and last extracted posts, and forums’ descriptions.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="190"/>
            <col width="140"/>
            <col width="170"/>
            <col width="200"/>
            <col width="300"/>
            <thead>
              <tr valign="top">
                <td>Forum</td>
                <td>Extracted posts, n</td>
                <td>Publication date of the first extracted post</td>
                <td>Publication date of the last extracted post</td>
                <td>Description</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>thyroideNEW</td>
                <td>451,253</td>
                <td>February 15, 2001</td>
                <td>February 25, 2021</td>
                <td>Specialized in thyroid issues</td>
              </tr>
              <tr valign="top">
                <td>doctissimoSante</td>
                <td>248,691</td>
                <td>March 19, 2003</td>
                <td>January 16, 2021</td>
                <td>Generalist health forum</td>
              </tr>
              <tr valign="top">
                <td>doctissimoNutrition</td>
                <td>183,730</td>
                <td>December 30, 2002</td>
                <td>January 16, 2021</td>
                <td>Specialized in nutrition</td>
              </tr>
              <tr valign="top">
                <td>infoBebe</td>
                <td>127,341</td>
                <td>November 30, 2000</td>
                <td>March 08, 2019</td>
                <td>Specialized for parents of a young baby</td>
              </tr>
              <tr valign="top">
                <td>atoute</td>
                <td>118,415</td>
                <td>February 05, 2005</td>
                <td>February 28, 2021</td>
                <td>Generalist health forum</td>
              </tr>
              <tr valign="top">
                <td>notreFamille</td>
                <td>97,098</td>
                <td>March 16, 2000</td>
                <td>October 26, 2017</td>
                <td>Specialized for families</td>
              </tr>
              <tr valign="top">
                <td>magicMaman</td>
                <td>96,713</td>
                <td>June 14, 1999</td>
                <td>February 22, 2021</td>
                <td>Specialized for mothers</td>
              </tr>
              <tr valign="top">
                <td>doctissimoMed</td>
                <td>95,531</td>
                <td>August 05, 2002</td>
                <td>January 15, 2021</td>
                <td>Generalist health forum</td>
              </tr>
              <tr valign="top">
                <td>doctissimoGrossesse</td>
                <td>93,449</td>
                <td>November 09, 2006</td>
                <td>January 15, 2021</td>
                <td>Specialized for pregnant women</td>
              </tr>
              <tr valign="top">
                <td>thyroide</td>
                <td>73,376</td>
                <td>September 25, 2001</td>
                <td>January 07, 2019</td>
                <td>Specialized in thyroid issues</td>
              </tr>
              <tr valign="top">
                <td>aufeminin</td>
                <td>72,732</td>
                <td>April 05, 2001</td>
                <td>January 09, 2020</td>
                <td>Specialized for women</td>
              </tr>
              <tr valign="top">
                <td>mamanVie</td>
                <td>69,167</td>
                <td>June 07, 2006</td>
                <td>April 10, 2019</td>
                <td>Specialized for mothers</td>
              </tr>
              <tr valign="top">
                <td>onmeda</td>
                <td>61,428</td>
                <td>July 25, 2001</td>
                <td>February 24, 2021</td>
                <td>Generalist health forum</td>
              </tr>
              <tr valign="top">
                <td>ados</td>
                <td>58,181</td>
                <td>June 20, 2006</td>
                <td>March 08, 2019</td>
                <td>Specialized for parents of a teenager or for teenagers</td>
              </tr>
              <tr valign="top">
                <td>carenity</td>
                <td>52,659</td>
                <td>May 16, 2011</td>
                <td>August 29, 2020</td>
                <td>Generalist health forum</td>
              </tr>
              <tr valign="top">
                <td>famili</td>
                <td>51,844</td>
                <td>November 06, 2000</td>
                <td>November 17, 2019</td>
                <td>Specialized for families</td>
              </tr>
              <tr valign="top">
                <td>babyFrance</td>
                <td>43,806</td>
                <td>January 20, 2003</td>
                <td>April 30, 2018</td>
                <td>Specialized for parents of young baby</td>
              </tr>
              <tr valign="top">
                <td>bebeMaman</td>
                <td>38,450</td>
                <td>—<sup>a</sup></td>
                <td>—</td>
                <td>Specialized for mothers of young baby</td>
              </tr>
              <tr valign="top">
                <td>alloDocteurs</td>
                <td>15,833</td>
                <td>June 15, 2009</td>
                <td>February 09, 2021</td>
                <td>Generalist health forum</td>
              </tr>
              <tr valign="top">
                <td>reboot</td>
                <td>9383</td>
                <td>May 04, 2016</td>
                <td>February 25, 2021</td>
                <td>Generalist health forum</td>
              </tr>
              <tr valign="top">
                <td>futura</td>
                <td>6765</td>
                <td>May 12, 2003</td>
                <td>February 22, 2021</td>
                <td>Generalist health forum</td>
              </tr>
              <tr valign="top">
                <td>sportSante</td>
                <td>6350</td>
                <td>May 10, 2011</td>
                <td>January 14, 2020</td>
                <td>Specialized for sportsperson</td>
              </tr>
              <tr valign="top">
                <td>maladieRares</td>
                <td>4827</td>
                <td>October 09, 2012</td>
                <td>May 14, 2020</td>
                <td>Specialized in rare diseases</td>
              </tr>
              <tr valign="top">
                <td>queChoisir</td>
                <td>4250</td>
                <td>June 16, 2003</td>
                <td>February 11, 2021</td>
                <td>Generalist health forum</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table5fn1">
              <p><sup>a</sup>Not available.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Use Case: Analysis of Levothyrox ADE Mentions in Forums</title>
        <p>To demonstrate the usage of the pipeline, we chose to focus on Levothyrox as a case study. Levothyrox is a drug prescribed in France since 1980 for hypothyroidism and circumstances where it is necessary to limit the thyroid-stimulating hormone. In 2017, a new formula of Levothyrox, differing from the 30-year-old drug at the excipient level (with lactose being replaced by mannitol and citric acid in the new formula), was marketed with widespread media coverage. In parallel, an unexpected increase in notifications of ADEs for this drug was detected. Viard et al [<xref ref-type="bibr" rid="ref37">37</xref>] were unable to find any pharmacological rationale to explain that signal. Approximately 32,000 adverse effects were reported by patients in France in 2017, representing 42% of all the ADEs collected yearly [<xref ref-type="bibr" rid="ref38">38</xref>]. Most of these notifications concerned the new formulation of Levothyrox and led to the “French Levothyrox crisis.” In 2017, 1664 notifications of ADEs were spontaneously reported by patients to the Pharmacovigilance Center of Nice. Among the 1544 reviewed notifications, 1372 concerned Levothyrox while only 172 concerned other drugs [<xref ref-type="bibr" rid="ref37">37</xref>].</p>
        <p>In this use case, the study period was from January 1, 2017, to February 28, 2021, and the drugs included were 2 drugs from the “H03AA Thyroid hormones” ATC class: “Levothyroxine sodium” and “associations of levothyroxine and liothyronine.” A total of 17 forums were selected as they included at least one post with information about these drugs. Posts were extracted, annotated, and analyzed through the pipeline from several forums (<xref ref-type="table" rid="table6">Table 6</xref>). Signal detection methods were applied to an ADE chosen as it frequently appeared with Levothyrox in our data: “tiredness.” A signal can be detected when the lower bound of the 95% CI of the logarithm of the PRR is greater than 0. For logistic regression, we applied the tenth quantile. A total of 11,340 posts contained an annotation concerning the drugs of interest. Figure S4 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> illustrates the source and evolution over time of these posts. Out of a total of 50,127 annotations of Levothyrox, they principally originated from the Vivre sans thyroïde forum and were mostly posted in mid-2017 (<xref rid="figure4" ref-type="fig">Figure 4</xref>, <xref ref-type="table" rid="table6">Table 6</xref>). The results of the statistical analysis were displayed by the user interface.</p>
        <p>ADEs annotated with Levothyrox were mainly from system organ classes: general disorders and administration site conditions (29.6%), metabolism and nutrition disorders (11.6%), and endocrine disorders (11.4%). The PTs mostly found in association with Levothyrox are listed in <xref ref-type="table" rid="table7">Table 7</xref>. All this information is accessible in the interface module (Figure S5 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>).</p>
        <p>We chose the PT “tiredness” for the signal detection analysis. A total of 85,976 posts were annotated with either one of the drugs of interest or the ADE tiredness. Among them, 1841 Levothyrox-tiredness couples were found, mostly in 2017 (<xref ref-type="table" rid="table7">Table 7</xref>).</p>
        <p><xref rid="figure5" ref-type="fig">Figure 5</xref> illustrates the time evolution of the PRR for the Levothyrox-tiredness couple. Figure S6 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> displays the source and evolution over time of French web forums’ posts for this couple. A signal is consistently generated throughout the period as the logarithm of the PRR is always greater than 0.</p>
        <fig id="figure4" position="float">
          <label>Figure 4</label>
          <caption>
            <p>The Statistic forum tab showing the central section's lower part after a query (Paracetamol and dizziness). The image displays an histogram of the distribution of ADEs grouped under "Preferred Terms." ADE: adverse drug event.</p>
          </caption>
          <graphic xlink:href="jmir_v26i1e46176_fig4.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <table-wrap position="float" id="table6">
          <label>Table 6</label>
          <caption>
            <p>Number of annotations of Levothyrox per forum in 11,340 posts from French web forums from January 1, 2017, to February 28, 2021.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="450"/>
            <col width="230"/>
            <col width="320"/>
            <thead>
              <tr valign="top">
                <td>Forum</td>
                <td>Value, n</td>
                <td>Cumulative frequency, %</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Vivre sans thyroïde</td>
                <td>41,211</td>
                <td>82.21</td>
              </tr>
              <tr valign="top">
                <td>Doctissimo Santé</td>
                <td>4230</td>
                <td>90.65</td>
              </tr>
              <tr valign="top">
                <td>Doctissimo Grossesse</td>
                <td>1476</td>
                <td>93.60</td>
              </tr>
              <tr valign="top">
                <td>Doctissimo Nutrition</td>
                <td>1177</td>
                <td>95.94</td>
              </tr>
              <tr valign="top">
                <td>Carenity</td>
                <td>863</td>
                <td>97.67</td>
              </tr>
              <tr valign="top">
                <td>Allo docteurs</td>
                <td>502</td>
                <td>98.67</td>
              </tr>
              <tr valign="top">
                <td>Atoute</td>
                <td>170</td>
                <td>99.01</td>
              </tr>
              <tr valign="top">
                <td>Doctissimo medicaments</td>
                <td>166</td>
                <td>99.34</td>
              </tr>
              <tr valign="top">
                <td>Que choisir</td>
                <td>85</td>
                <td>99.51</td>
              </tr>
              <tr valign="top">
                <td>Maladie rares</td>
                <td>76</td>
                <td>99.66</td>
              </tr>
              <tr valign="top">
                <td>Au feminin</td>
                <td>58</td>
                <td>99.77</td>
              </tr>
              <tr valign="top">
                <td>Sport santé</td>
                <td>50</td>
                <td>99.87</td>
              </tr>
              <tr valign="top">
                <td>Onmeda</td>
                <td>48</td>
                <td>99.97</td>
              </tr>
              <tr valign="top">
                <td>Famili</td>
                <td>7</td>
                <td>99.98</td>
              </tr>
              <tr valign="top">
                <td>Futura</td>
                <td>5</td>
                <td>99.99</td>
              </tr>
              <tr valign="top">
                <td>Maman vie</td>
                <td>2</td>
                <td>100.00</td>
              </tr>
              <tr valign="top">
                <td>Magic maman</td>
                <td>1</td>
                <td>100.00</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
        <table-wrap position="float" id="table7">
          <label>Table 7</label>
          <caption>
            <p>The 20 preferred terms most frequently found with Levothyrox in 11,340 posts from French web forums from January 1, 2017, to February 28, 2021.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="610"/>
            <col width="390"/>
            <thead>
              <tr valign="top">
                <td>Preferred terms</td>
                <td>Values, n</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Pain</td>
                <td>1882</td>
              </tr>
              <tr valign="top">
                <td>Tiredness</td>
                <td>1841</td>
              </tr>
              <tr valign="top">
                <td>Faintness</td>
                <td>1267</td>
              </tr>
              <tr valign="top">
                <td>Hypothyroidism</td>
                <td>1110</td>
              </tr>
              <tr valign="top">
                <td>Dizziness</td>
                <td>912</td>
              </tr>
              <tr valign="top">
                <td>Insomnia</td>
                <td>627</td>
              </tr>
              <tr valign="top">
                <td>Palpitations</td>
                <td>571</td>
              </tr>
              <tr valign="top">
                <td>Hyperthyroidism</td>
                <td>568</td>
              </tr>
              <tr valign="top">
                <td>Malignant tumor</td>
                <td>560</td>
              </tr>
              <tr valign="top">
                <td>Anxiety</td>
                <td>498</td>
              </tr>
              <tr valign="top">
                <td>Overdose</td>
                <td>490</td>
              </tr>
              <tr valign="top">
                <td>Nervous tension</td>
                <td>484</td>
              </tr>
              <tr valign="top">
                <td>Myalgia</td>
                <td>409</td>
              </tr>
              <tr valign="top">
                <td>Nausea</td>
                <td>388</td>
              </tr>
              <tr valign="top">
                <td>Stress</td>
                <td>380</td>
              </tr>
              <tr valign="top">
                <td>Diarrhea</td>
                <td>354</td>
              </tr>
              <tr valign="top">
                <td>Tachycardia</td>
                <td>322</td>
              </tr>
              <tr valign="top">
                <td>Muscle spasms</td>
                <td>321</td>
              </tr>
              <tr valign="top">
                <td>Convulsions</td>
                <td>302</td>
              </tr>
              <tr valign="top">
                <td>Arthralgia</td>
                <td>276</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
        <fig id="figure5" position="float">
          <label>Figure 5</label>
          <caption>
            <p>Evolution of the monthly cumulated PRR over time for the annotated couples of Levothyrox-tiredness from 11,340 French web forum posts from January 1, 2017, to February 28, 2021. PRR: proportional reporting ratio.</p>
          </caption>
          <graphic xlink:href="jmir_v26i1e46176_fig5.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <p>A total of 11 drugs were found to be associated with tiredness using logistic regression: paclitaxel, pegfilgrastim, Levothyrox, glatiramer acetate, escitalopram ferrous sulfate, the combination of Levothyrox and liothyronine, secukinumab, methotrexate, bismuth potassium, tetracycline, and metronidazole.</p>
        <p>Change-point analysis was conducted on the monthly evolution of the number of Levothyrox-ADE couples detected in web forums. Six breakpoints were identified (<xref rid="figure6" ref-type="fig">Figure 6</xref>), and 3 of them correlated with an increase in the number of ADEs found with Levothyrox on web forums. These increases occurred in August 2017 and in September and December 2018.</p>
        <p>This use case demonstrates that the results obtained through the pipeline, particularly in the context of Levothyrox, align with findings in the literature derived from more traditional data sources such as case reports in pharmacovigilance (see the “Discussion” section). It underscores the potential of leveraging such a pipeline to monitor a drug, not only retrospectively but also in real time using social media. Consequently, PHARES has the capability to potentially uncover new signals in pharmacovigilance.</p>
        <fig id="figure6" position="float">
          <label>Figure 6</label>
          <caption>
            <p>Breakpoints identified with the CPA method (vertical red lines) in the temporal evolution of the number of Levothyrox-ADE couples (horizontal red line) annotated in 11,340 French web forums posts from January 1, 2017, to February 28, 2021. ADE: adverse drug event; CPA: change-point analysis.</p>
          </caption>
          <graphic xlink:href="jmir_v26i1e46176_fig6.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <p>To align with our objective, we implemented and evaluated a pipeline that processes data from the extraction of web forum posts to the generation of indicators and alerts within a visual and interactive environment. Through this pipeline, we demonstrated that quantitative analysis can be conducted through the interface without requiring the user to code. We discovered the feasibility of acquiring information akin to the literature regarding a drug’s ADEs, as well as unexpected ADEs and significant event dates related to a drug. This underscores the relevance and utility of such a pipeline.</p>
        <p>A conceptual contribution of this research was the proposal of a methodology for designing a pipeline to facilitate pharmacovigilance studies on web forums. This involved describing 4 independent modules and outlining their interactions. Additionally, another contribution was the adaptation of certain pharmacovigilance analysis methods for the examination of data extracted from web forum posts. The logistic regression–based method presented in this article was originally tailored for pharmacovigilance cases to consider co-prescriptions of drugs. We have adapted it to suit the analysis of pharmacovigilance data extracted from web forum posts.</p>
      </sec>
      <sec>
        <title>Comparison With Prior Work</title>
        <p>The PHARES pipeline offers added value compared with previous pipelines in terms of the criteria set, which reflects an analysis of experts’ needs for routine monitoring of ADEs in social media. Unlike previous approaches, the scrapers used in PHARES routinely perform deidentification, and the inclusion of change-point analysis, the evolution of PRRs over time, and a logistic regression–based signal detection method were previously unavailable. The temporal evolution of the number of posts and a signal detection method are also seldom supported. Designed for routine usage and focused on ADEs, all posts from selected web forums are scraped and deidentified using an open-source scraper.</p>
        <p>The period and selected web forums differed between both studies: Audeh et al [<xref ref-type="bibr" rid="ref38">38</xref>] covered the period from January 2015 to December 2017, while our study spanned from January 2017 to February 2021. Additionally, Audeh et al [<xref ref-type="bibr" rid="ref38">38</xref>] included only 1 web forum specialized in thyroid issues, whereas we incorporated this specific forum along with 16 others. The main ADEs associated with Levothyrox in our study align with those found by Audeh et al [<xref ref-type="bibr" rid="ref38">38</xref>] on similar data, albeit without using the interface. In our study, the 10 most frequent symptoms were pain, tiredness, faintness, hypothyroidism, dizziness, insomnia, palpitations, hyperthyroidism, malignant tumor, and anxiety. By contrast, Audeh et al [<xref ref-type="bibr" rid="ref38">38</xref>] reported tiredness, weight gain, pain, ganglions, hot flush, chilly, inflammation, faintness, weight loss, and discomfort.</p>
        <p>Furthermore, the PHARES pipeline surpasses previous efforts, particularly regarding several criteria. These include the annotation tool, where only 4 pipelines were identified using a French annotator tool. In terms of available statistics, only 1 pipeline met both criteria we identified. Regarding signal detection, among the 3 criteria identified, 5 pipelines matched with only 1, while the remaining 5 matched with none.</p>
        <p>In the use case, a notable increase in the number of ADEs associated with Levothyrox was detected using the change-point analysis method a few months after the introduction of the new formula in March 2017, specifically in August 2017. This surge coincided with the initial declaration to the pharmacovigilance network and a petition initiated by patients to reintroduce the former formula in June 2017. We compared these findings with results from a pharmacovigilance study based on spontaneous reporting. Out of 1554 notifications spontaneously addressed by patients to the Pharmacovigilance Center of Nice from January 1, 2017, to December 31, 2017, 1372 were related to the new formula of Levothyrox, representing 7342 ADEs. Our comparison with these data clarified our findings. The 10 most frequently reported ADEs in these notifications closely resembled our own results [<xref ref-type="bibr" rid="ref37">37</xref>]. These were asthenia, headache, dizziness, hair loss, insomnia, cramps, weight gain, nausea, muscle pain, and irritability. Consequently, our results demonstrate coherence with the existing literature. This study illustrates the feasibility of identifying the date of significant events related to a drug. However, it is noteworthy that the detection of such events is not necessarily expedited through social media compared with the traditional pharmacovigilance system.</p>
      </sec>
      <sec>
        <title>Limitations</title>
        <p>The method used in our annotation process was integrated at an early stage during the pipeline’s design. Regarding the identification of drugs and symptoms, our annotation process exhibited the following performances: precision=0.926, recall=0.845, and <italic>F</italic><sub>1</sub>-score=0.886 [<xref ref-type="bibr" rid="ref20">20</xref>]. Similarly, for discerning the relationship between the drug and the ADEs, the performances were precision=0.683, recall=0.956, and <italic>F</italic><sub>1</sub>-score=0.797 [<xref ref-type="bibr" rid="ref20">20</xref>]. This study marked the inaugural publication on using NLP methods to identify ADEs in French-language web forums. The annotation process was thus developed using contemporary state-of-the-art methodologies at the time. However, it would now stand to gain from the integration of more recent NLP algorithms for named entity recognition [<xref ref-type="bibr" rid="ref8">8</xref>,<xref ref-type="bibr" rid="ref23">23</xref>,<xref ref-type="bibr" rid="ref24">24</xref>]. These newer algorithms offer comparable performances while effectively handling more complex data, thereby enhancing the efficacy of NLP analysis. However, because of our emphasis on the genericity of the approach and the interoperability between the different modules rather than solely focusing on the performance of each module, we opted not to use these algorithms. Nevertheless, contemporary state-of-the-art methods for annotating ADEs from social media posts encompass convolutional neural networks trained on top of pretrained word vectors for sentence-level classification [<xref ref-type="bibr" rid="ref24">24</xref>] and transformers using the bidirectional encoder representations from transformers (BERT) language model [<xref ref-type="bibr" rid="ref39">39</xref>]. Hussain et al [<xref ref-type="bibr" rid="ref25">25</xref>] introduced a multitask neural network based on BERT with hyperparameter optimization capable of sentence classification and named entity recognition. This model achieved performances of precision=0.840, recall=0.861, and <italic>F</italic><sub>1</sub>-score=0.896 on the Twitter (X)-TwiMed data set. Additionally, Magge et al [<xref ref-type="bibr" rid="ref8">8</xref>] presented a pipeline consisting of 3 BERT neural networks designed to classify sentences, extract named entities, and normalize those entities to their respective MedDRA concepts. The performances of this model were as follows: precision=0.82, recall=0.76, and <italic>F</italic><sub>1</sub>-score=0.78 on the SMM4H-2020 data set (Twitter/X). Thanks to our modular design, it will be straightforward to substitute our current annotation process with an enhanced model in the future.</p>
        <p>Several limitations should be acknowledged for future work. First, the scraper relies on the HTML structure of web forums, necessitating updates to its configuration files if a forum alters its page design. Additionally, our interface lacks the capability to incorporate alternate identifiers for drugs or ADEs. For instance, patients may commonly refer to the drug “baclofen” as “baclo” on social media platforms. Consequently, the number of posts pertaining to a drug or ADE could potentially be underestimated.</p>
        <p>Forums must be selected before query execution to mitigate calculation time. However, selecting forums based on the presence of information related to a particular drug or ADE can introduce bias into signal detection methods, particularly in disproportionality analysis, where the drug-ADE pair may be overrepresented. Another limitation in qualitative analysis of posts is the inability of users to edit annotations or record typical pharmacovigilance qualitative data.</p>
        <p>The assumption that all drugs mentioned in a post were consumed simultaneously by the user, as applied in the logistic regression–based method, introduces an evident bias.</p>
        <p>One limitation associated with the use of social media data pertains to fraudulent posts. The pseudonymity inherent in these platforms provides malevolent individuals with the opportunity to disseminate false rumors. Additionally, patients might post identical or similar messages across multiple discussion boards, or even multiple times on the same board. Thus, it is crucial to consider these factors to mitigate biases in signal detection.</p>
      </sec>
      <sec>
        <title>Perspectives</title>
        <p>In the short to medium term, our objectives are updating the annotation module to enhance accuracy, improving the qualitative analysis by enabling users to edit and correct annotations, and expanding the range of signal detection methods available in the statistics module.</p>
        <p>This method could indeed be beneficial for identifying potential drug misuse and unknown ADEs [<xref ref-type="bibr" rid="ref40">40</xref>]. By categorizing pathological terms found in web forums based on their presence in the summary of product characteristics, we can distinguish between indications, known ADEs, and potential instances of drug misuse or unexpected ADEs. However, it is important to note that considering all pathological terms found in the summary of product characteristics as indications might obscure cases of drug inefficiency. Therefore, a nuanced approach is necessary to ensure comprehensive and accurate analysis.</p>
        <p>We next tested our pipeline from the perspective of end users. However, the hypothesis was only partially confirmed, indicating the need for further studies. These studies should include evaluations with ergonomic criteria.</p>
        <p>In the long term, our vision is to expand this tool to encompass other languages and themes beyond pharmacovigilance. This includes areas such as drug misuse, the consumption of food supplements, and the use of illegal drugs. French web forums dedicated to recreational drug use already exist, providing a valuable source of data for such endeavors.</p>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>Our hypothesis focused on the challenge encountered by regulatory agencies in using social media, primarily because of the lack of appropriate decision-making tools. To tackle this challenge, we devised a pipeline consisting of 4 editable modules aimed at effectively analyzing health-related French web forums for pharmacovigilance purposes. Using this pipeline and its user-friendly interface, we successfully demonstrated the feasibility of conducting quantitative analyses without the need for coding. This approach yielded coherent results and holds the potential to reveal new insights about drugs.</p>
        <p>A practical implication of our pipeline is its potential application in health surveillance by regulatory agencies such as the ANSM or pharmaceutical companies. It can be instrumental in detecting issues related to drug safety and efficacy in real time. Furthermore, research teams can leverage this tool to retrospectively analyze events and gain valuable insights into pharmacovigilance trends.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>Vigi4Med Scraper structure, PHARES database structure, example of data representation, and source and evolution over time of web forum posts. PHARES: Pharmacovigilance in Social Networks.</p>
        <media xlink:href="jmir_v26i1e46176_app1.docx" xlink:title="DOCX File , 931 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">ADE</term>
          <def>
            <p>adverse drug event</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">ANSM</term>
          <def>
            <p>Agence nationale de sécurité du médicament et des produits de santé</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">ATC</term>
          <def>
            <p>Anatomical Therapeutic Classification</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">BERT</term>
          <def>
            <p>Bidirectional Encoder Representations from Transformer</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">CSV</term>
          <def>
            <p>comma-separated values</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">CUSUM</term>
          <def>
            <p>Cumulative Sum</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">EMA</term>
          <def>
            <p>European Medicines Agency</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">FDA</term>
          <def>
            <p>Food and Drug Administration</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb9">FPVD</term>
          <def>
            <p>French Pharmacovigilance Database</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb10">GDPR</term>
          <def>
            <p>General Data Protection Regulation</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb11">HAS</term>
          <def>
            <p>French National Health Authority</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb12">MedDRA</term>
          <def>
            <p>Medical Dictionary for Regulatory Activities Terminology</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb13">NLP</term>
          <def>
            <p>natural language processing</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb14">PHARES</term>
          <def>
            <p>Pharmacovigilance in Social Networks</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb15">PREDOSE</term>
          <def>
            <p>Prescription Drug Abuse Online Surveillance and Epidemiology</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb16">PRR</term>
          <def>
            <p>proportional reporting ratio</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb17">PT</term>
          <def>
            <p>preferred term</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb18">RDF</term>
          <def>
            <p>resource description framework</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb19">ROR</term>
          <def>
            <p>reporting odds ratio</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb20">WEB-RADR</term>
          <def>
            <p>Recognizing Adverse Drug Reactions</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>The annotation module was developed by François Morlane-Hondère, Cyril Grouin, Pierre Zweigenbaum, and Leonardo Campillos-Llanos from the Computer Science Laboratory for Mechanics and Engineering Sciences (LIMSI). Code review for the graphical user interface in R language was performed by Stevenn Volant under a contract with the Stat4Decision company. Stat4Decision was not involved in designing the study and writing this article. This work was funded by the Agence nationale de sécurité du médicament et des produits de santé (ANSM) through Convention No. 2016S076 and was supported by a PhD contract with Sorbonne Université.</p>
    </ack>
    <notes>
      <sec>
        <title>Data Availability</title>
        <p>Our data were extracted from web forums that do not allow data sharing. Thus, as we are not the owners of the data we cannot make the data available. The scrapper we developed to extract these data is open source and can be used to extract data from web forum posts. The tool as well as full documentation (in English and French) of the code and configuration file are available online [<xref ref-type="bibr" rid="ref41">41</xref>].</p>
      </sec>
    </notes>
    <fn-group>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hazell</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Shakir</surname>
              <given-names>SAW</given-names>
            </name>
          </person-group>
          <article-title>Under-reporting of adverse drug reactions : a systematic review</article-title>
          <source>Drug Saf</source>
          <year>2006</year>
          <volume>29</volume>
          <issue>5</issue>
          <fpage>385</fpage>
          <lpage>96</lpage>
          <pub-id pub-id-type="doi">10.2165/00002018-200629050-00003</pub-id>
          <pub-id pub-id-type="medline">16689555</pub-id>
          <pub-id pub-id-type="pii">2953</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Jagannatha</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Yu</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Towards drug safety surveillance and pharmacovigilance: current progress in detecting medication and adverse drug events from electronic health records</article-title>
          <source>Drug Saf</source>
          <year>2019</year>
          <month>01</month>
          <volume>42</volume>
          <issue>1</issue>
          <fpage>95</fpage>
          <lpage>97</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/30649734"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/s40264-018-0766-8</pub-id>
          <pub-id pub-id-type="medline">30649734</pub-id>
          <pub-id pub-id-type="pii">10.1007/s40264-018-0766-8</pub-id>
          <pub-id pub-id-type="pmcid">PMC6842570</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Warrer</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Hansen</surname>
              <given-names>EH</given-names>
            </name>
            <name name-style="western">
              <surname>Juhl-Jensen</surname>
              <given-names>Lars</given-names>
            </name>
            <name name-style="western">
              <surname>Aagaard</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Using text-mining techniques in electronic patient records to identify ADRs from medicine use</article-title>
          <source>Br J Clin Pharmacol</source>
          <year>2012</year>
          <month>05</month>
          <volume>73</volume>
          <issue>5</issue>
          <fpage>674</fpage>
          <lpage>84</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/22122057"/>
          </comment>
          <pub-id pub-id-type="doi">10.1111/j.1365-2125.2011.04153.x</pub-id>
          <pub-id pub-id-type="medline">22122057</pub-id>
          <pub-id pub-id-type="pmcid">PMC3403195</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Black</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Tagiyeva‐Milne</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Helms</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Moir</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Pharmacovigilance in children: detecting adverse drug reactions in routine electronic healthcare records. A systematic review</article-title>
          <source>Brit J Clinical Pharma</source>
          <year>2015</year>
          <month>05</month>
          <day>28</day>
          <volume>80</volume>
          <issue>4</issue>
          <fpage>844</fpage>
          <lpage>854</lpage>
          <pub-id pub-id-type="doi">10.1111/bcp.12645</pub-id>
          <pub-id pub-id-type="medline">25819310</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cameron</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Smith</surname>
              <given-names>GA</given-names>
            </name>
            <name name-style="western">
              <surname>Daniulaityte</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Sheth</surname>
              <given-names>AP</given-names>
            </name>
            <name name-style="western">
              <surname>Dave</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Anand</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Carlson</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Watkins</surname>
              <given-names>KZ</given-names>
            </name>
            <name name-style="western">
              <surname>Falck</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>PREDOSE: a semantic web platform for drug abuse epidemiology using social media</article-title>
          <source>Journal of Biomedical Informatics</source>
          <year>2013</year>
          <month>12</month>
          <volume>46</volume>
          <issue>6</issue>
          <fpage>985</fpage>
          <lpage>997</lpage>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2013.07.007</pub-id>
          <pub-id pub-id-type="medline">23892295</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yeleswarapu</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Rao</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Joseph</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Saipradeep</surname>
              <given-names>VG</given-names>
            </name>
            <name name-style="western">
              <surname>Srinivasan</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>A pipeline to extract drug-adverse event pairs from multiple data sources</article-title>
          <source>BMC Med Inform Decis Mak</source>
          <year>2014</year>
          <month>02</month>
          <day>24</day>
          <volume>14</volume>
          <issue>1</issue>
          <fpage>1</fpage>
          <lpage>16</lpage>
          <pub-id pub-id-type="doi">10.1186/1472-6947-14-13</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Nikfarjam</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Ransohoff</surname>
              <given-names>JD</given-names>
            </name>
            <name name-style="western">
              <surname>Callahan</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Jones</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Loew</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Kwong</surname>
              <given-names>BY</given-names>
            </name>
            <name name-style="western">
              <surname>Sarin</surname>
              <given-names>KY</given-names>
            </name>
            <name name-style="western">
              <surname>Shah</surname>
              <given-names>NH</given-names>
            </name>
          </person-group>
          <article-title>Early detection of adverse drug reactions in social health networks: a natural language processing pipeline for signal detection</article-title>
          <source>JMIR Public Health Surveill</source>
          <year>2019</year>
          <month>06</month>
          <day>03</day>
          <volume>5</volume>
          <issue>2</issue>
          <fpage>e11264</fpage>
          <pub-id pub-id-type="doi">10.2196/11264</pub-id>
          <pub-id pub-id-type="medline">31162134</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Magge</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Tutubalina</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Miftahutdinov</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Alimova</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Dirkson</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Verberne</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>DeepADEMiner: a deep learning pharmacovigilance pipeline for extraction and normalization of adverse drug event mentions on Twitter</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2021</year>
          <month>09</month>
          <day>18</day>
          <volume>28</volume>
          <issue>10</issue>
          <fpage>2184</fpage>
          <lpage>2192</lpage>
          <pub-id pub-id-type="doi">10.1093/jamia/ocab114</pub-id>
          <pub-id pub-id-type="medline">34270701</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Audeh</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Beigbeder</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Zimmermann</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Jaillon</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Bousquet</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Vigi4Med scraper: a framework for web forum structured data extraction and semantic representation</article-title>
          <source>PLoS One</source>
          <year>2017</year>
          <month>1</month>
          <day>25</day>
          <volume>12</volume>
          <issue>1</issue>
          <fpage>e0169658</fpage>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0169658</pub-id>
          <pub-id pub-id-type="medline">28122056</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Caster</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Dietrich</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Kürzinger</surname>
              <given-names>Marie-Laure</given-names>
            </name>
            <name name-style="western">
              <surname>Lerch</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Maskell</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Norén</surname>
              <given-names>G Niklas</given-names>
            </name>
            <name name-style="western">
              <surname>Tcherny-Lessenot</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Vroman</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Wisniewski</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>van Stekelenborg</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Assessment of the utility of social media for broad-ranging statistical signal detection in pharmacovigilance: results from the WEB-RADR project</article-title>
          <source>Drug Saf</source>
          <year>2018</year>
          <month>12</month>
          <volume>41</volume>
          <issue>12</issue>
          <fpage>1355</fpage>
          <lpage>1369</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/30043385"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/s40264-018-0699-2</pub-id>
          <pub-id pub-id-type="medline">30043385</pub-id>
          <pub-id pub-id-type="pii">10.1007/s40264-018-0699-2</pub-id>
          <pub-id pub-id-type="pmcid">PMC6223695</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bousquet</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Audeh</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Bellet</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Lillo-Le Louët</surname>
              <given-names>Agnès</given-names>
            </name>
          </person-group>
          <article-title>Comment on "Assessment of the utility of social media for broad-ranging statistical signal detection in pharmacovigilance: results from the WEB-RADR project"</article-title>
          <source>Drug Saf</source>
          <year>2018</year>
          <month>12</month>
          <day>19</day>
          <volume>41</volume>
          <issue>12</issue>
          <fpage>1371</fpage>
          <lpage>1373</lpage>
          <pub-id pub-id-type="doi">10.1007/s40264-018-0747-y</pub-id>
          <pub-id pub-id-type="medline">30341678</pub-id>
          <pub-id pub-id-type="pii">10.1007/s40264-018-0747-y</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Karapetiantz</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Bellet</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Audeh</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Lardon</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Leprovost</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Aboukhamis</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Morlane-Hondère</surname>
              <given-names>François</given-names>
            </name>
            <name name-style="western">
              <surname>Grouin</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Burgun</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Katsahian</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Jaulent</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Beyens</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Lillo-Le Louët</surname>
              <given-names>Agnès</given-names>
            </name>
            <name name-style="western">
              <surname>Bousquet</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Descriptions of adverse drug reactions are less informative in forums than in the French pharmacovigilance database but provide more unexpected reactions</article-title>
          <source>Front Pharmacol</source>
          <year>2018</year>
          <month>5</month>
          <day>1</day>
          <volume>9</volume>
          <fpage>439</fpage>
          <lpage>11</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/29765326"/>
          </comment>
          <pub-id pub-id-type="doi">10.3389/fphar.2018.00439</pub-id>
          <pub-id pub-id-type="medline">29765326</pub-id>
          <pub-id pub-id-type="pmcid">PMC5938397</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lardon</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Abdellaoui</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Bellet</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Asfari</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Souvignet</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Texier</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Jaulent</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Beyens</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Burgun</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Bousquet</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Adverse drug reaction identification and extraction in social media: a scoping review</article-title>
          <source>J Med Internet Res</source>
          <year>2015</year>
          <month>07</month>
          <day>10</day>
          <volume>17</volume>
          <issue>7</issue>
          <fpage>e171</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2015/7/e171/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/jmir.4304</pub-id>
          <pub-id pub-id-type="medline">26163365</pub-id>
          <pub-id pub-id-type="pii">v17i7e171</pub-id>
          <pub-id pub-id-type="pmcid">PMC4526988</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Karapetiantz</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Audeh</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Faille</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Lillo-Le Louët</surname>
              <given-names>Agnès</given-names>
            </name>
            <name name-style="western">
              <surname>Bousquet</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Qualitative and quantitative analysis of web forums for adverse events detection: "strontium ranelate" case study</article-title>
          <source>Stud Health Technol Inform</source>
          <year>2019</year>
          <month>08</month>
          <day>21</day>
          <volume>264</volume>
          <fpage>964</fpage>
          <lpage>968</lpage>
          <pub-id pub-id-type="doi">10.3233/SHTI190367</pub-id>
          <pub-id pub-id-type="medline">31438067</pub-id>
          <pub-id pub-id-type="pii">SHTI190367</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Casperson</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Painter</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Dietrich</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Strategies for distributed curation of social media data for safety and pharmacovigilance</article-title>
          <year>2016</year>
          <conf-name>International Conference on Data Science (ICDATA)</conf-name>
          <conf-date>October 1, 2016</conf-date>
          <conf-loc>Barcelona, Spain</conf-loc>
          <fpage>118</fpage>
          <lpage>124</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Freifeld</surname>
              <given-names>CC</given-names>
            </name>
          </person-group>
          <article-title>Digital pharmacovigilance: The medwatcher system for monitoring adverse events through automated processing of internet social media and crowdsourcing</article-title>
          <source>OpenBU Libraries</source>
          <year>2014</year>
          <publisher-loc>Boston University</publisher-loc>
          <publisher-name>OpenBU</publisher-name>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://open.bu.edu/handle/2144/10995">https://open.bu.edu/handle/2144/10995</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cossin</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Lebrun</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Lobre</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Loustau</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Jouhet</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Griffier</surname>
              <given-names>Romain</given-names>
            </name>
            <name name-style="western">
              <surname>Mougin</surname>
              <given-names>Fleur</given-names>
            </name>
            <name name-style="western">
              <surname>Diallo</surname>
              <given-names>Gayo</given-names>
            </name>
            <name name-style="western">
              <surname>Thiessard</surname>
              <given-names>Frantz</given-names>
            </name>
          </person-group>
          <article-title>Romedi: an open data source about French drugs on the semantic web</article-title>
          <source>Stud Health Technol Inform</source>
          <year>2019</year>
          <month>08</month>
          <day>21</day>
          <volume>264</volume>
          <fpage>79</fpage>
          <lpage>82</lpage>
          <pub-id pub-id-type="doi">10.3233/SHTI190187</pub-id>
          <pub-id pub-id-type="medline">31437889</pub-id>
          <pub-id pub-id-type="pii">SHTI190187</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Abdellaoui</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Schück</surname>
              <given-names>Stéphane</given-names>
            </name>
            <name name-style="western">
              <surname>Texier</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Burgun</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Filtering entities to optimize identification of adverse drug reaction from social media: how can the number of words between entities in the messages help?</article-title>
          <source>JMIR Public Health Surveill</source>
          <year>2017</year>
          <month>06</month>
          <day>22</day>
          <volume>3</volume>
          <issue>2</issue>
          <fpage>e36</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://publichealth.jmir.org/2017/2/e36/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/publichealth.6577</pub-id>
          <pub-id pub-id-type="medline">28642212</pub-id>
          <pub-id pub-id-type="pii">v3i2e36</pub-id>
          <pub-id pub-id-type="pmcid">PMC5500778</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bousquet</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Dahamna</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Guillemin-Lanne</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Darmoni</surname>
              <given-names>SJ</given-names>
            </name>
            <name name-style="western">
              <surname>Faviez</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Huot</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Katsahian</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Leroux</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Pereira</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Richard</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Schück</surname>
              <given-names>Stéphane</given-names>
            </name>
            <name name-style="western">
              <surname>Souvignet</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Lillo-Le Louët</surname>
              <given-names>Agnès</given-names>
            </name>
            <name name-style="western">
              <surname>Texier</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <article-title>The adverse drug reactions from patient reports in social media project: five major challenges to overcome to operationalize analysis and efficiently support pharmacovigilance process</article-title>
          <source>JMIR Res Protoc</source>
          <year>2017</year>
          <month>09</month>
          <day>21</day>
          <volume>6</volume>
          <issue>9</issue>
          <fpage>e179</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.researchprotocols.org/2017/9/e179/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/resprot.6463</pub-id>
          <pub-id pub-id-type="medline">28935617</pub-id>
          <pub-id pub-id-type="pii">v6i9e179</pub-id>
          <pub-id pub-id-type="pmcid">PMC5629348</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Morlane-Hondère</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Grouin</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Zweigenbaum</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Identification of drug-related medical conditions in social media</article-title>
          <year>2016</year>
          <month>05</month>
          <conf-name>The Tenth International Conference on Language Resources and Evaluation (LREC'16)</conf-name>
          <conf-date>May 2, 2016</conf-date>
          <conf-loc>Portoroz, Slovenia</conf-loc>
          <fpage>2022</fpage>
          <lpage>2028</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lafferty</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>McCallum</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Pereira</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <article-title>Conditional random fields: probabilistic models for segmenting and labeling sequence data</article-title>
          <year>2001</year>
          <month>06</month>
          <day>28</day>
          <conf-name>Eighteenth International Conference on Machine Learning (ICML 2001)</conf-name>
          <conf-date>June 28, 2001 to July 1, 2001</conf-date>
          <conf-loc>Williamstown, MA</conf-loc>
          <publisher-loc>San Francisco, CA</publisher-loc>
          <publisher-name>Morgan Kaufmann Publishers</publisher-name>
          <fpage>282</fpage>
          <lpage>289</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Xia</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Historical profile will tell? A deep learning-based multi-level embedding framework for adverse drug event detection and extraction</article-title>
          <source>Decision Support Systems</source>
          <year>2022</year>
          <month>09</month>
          <volume>160</volume>
          <fpage>113832</fpage>
          <pub-id pub-id-type="doi">10.1016/j.dss.2022.113832</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yu</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Vydiswaran</surname>
              <given-names>VGV</given-names>
            </name>
          </person-group>
          <article-title>An assessment of mentions of adverse drug events on social media with natural language processing: model development and analysis</article-title>
          <source>JMIR Med Inform</source>
          <year>2022</year>
          <month>09</month>
          <day>28</day>
          <volume>10</volume>
          <issue>9</issue>
          <fpage>e38140</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://medinform.jmir.org/2022/9/e38140/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/38140</pub-id>
          <pub-id pub-id-type="medline">36170004</pub-id>
          <pub-id pub-id-type="pii">v10i9e38140</pub-id>
          <pub-id pub-id-type="pmcid">PMC9557755</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rezaei</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Ebrahimpour-Komleh</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Eslami</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Chavoshinejad</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Totonchi</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Adverse drug reaction detection in social media by deep learning methods</article-title>
          <source>Cell J</source>
          <year>2020</year>
          <month>10</month>
          <volume>22</volume>
          <issue>3</issue>
          <fpage>319</fpage>
          <lpage>324</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/31863657"/>
          </comment>
          <pub-id pub-id-type="doi">10.22074/cellj.2020.6615</pub-id>
          <pub-id pub-id-type="medline">31863657</pub-id>
          <pub-id pub-id-type="pmcid">PMC6947008</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hussain</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Afzal</surname>
              <given-names>Hammad</given-names>
            </name>
            <name name-style="western">
              <surname>Saeed</surname>
              <given-names>Ramsha</given-names>
            </name>
            <name name-style="western">
              <surname>Iltaf</surname>
              <given-names>Naima</given-names>
            </name>
            <name name-style="western">
              <surname>Umair</surname>
              <given-names>Mir Yasir</given-names>
            </name>
          </person-group>
          <article-title>Pharmacovigilance with transformers: a framework to detect adverse drug reactions using BERT fine-tuned with farm</article-title>
          <source>Comput Math Methods Med</source>
          <year>2021</year>
          <volume>2021</volume>
          <fpage>5589829</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1155/2021/5589829"/>
          </comment>
          <pub-id pub-id-type="doi">10.1155/2021/5589829</pub-id>
          <pub-id pub-id-type="medline">34422092</pub-id>
          <pub-id pub-id-type="pmcid">PMC8378963</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ding</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Zhou</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Lei</surname>
              <given-names>Z</given-names>
            </name>
          </person-group>
          <article-title>An attentive neural sequence labeling model for adverse drug reactions mentions extraction</article-title>
          <source>IEEE Access</source>
          <year>2018</year>
          <volume>6</volume>
          <fpage>73305</fpage>
          <lpage>73315</lpage>
          <pub-id pub-id-type="doi">10.1109/access.2018.2882443</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Kass-Hout</surname>
              <given-names>Taha</given-names>
            </name>
            <name name-style="western">
              <surname>Anderson-Smits</surname>
              <given-names>Colin</given-names>
            </name>
            <name name-style="western">
              <surname>Gray</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Signal detection using change point analysis in postmarket surveillance</article-title>
          <source>Pharmacoepidemiol Drug Saf</source>
          <year>2015</year>
          <month>06</month>
          <day>22</day>
          <volume>24</volume>
          <issue>6</issue>
          <fpage>663</fpage>
          <lpage>8</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/25903221"/>
          </comment>
          <pub-id pub-id-type="doi">10.1002/pds.3783</pub-id>
          <pub-id pub-id-type="medline">25903221</pub-id>
          <pub-id pub-id-type="pmcid">PMC4690504</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Evans</surname>
              <given-names>SJW</given-names>
            </name>
            <name name-style="western">
              <surname>Waller</surname>
              <given-names>PC</given-names>
            </name>
            <name name-style="western">
              <surname>Davis</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Use of proportional reporting ratios (PRRs) for signal generation from spontaneous adverse drug reaction reports</article-title>
          <source>Pharmacoepidemiol Drug Saf</source>
          <year>2001</year>
          <month>12</month>
          <day>10</day>
          <volume>10</volume>
          <issue>6</issue>
          <fpage>483</fpage>
          <lpage>6</lpage>
          <pub-id pub-id-type="doi">10.1002/pds.677</pub-id>
          <pub-id pub-id-type="medline">11828828</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>van Puijenbroek</surname>
              <given-names>EP</given-names>
            </name>
            <name name-style="western">
              <surname>Bate</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Leufkens</surname>
              <given-names>HGM</given-names>
            </name>
            <name name-style="western">
              <surname>Lindquist</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Orre</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Egberts</surname>
              <given-names>ACG</given-names>
            </name>
          </person-group>
          <article-title>A comparison of measures of disproportionality for signal detection in spontaneous reporting systems for adverse drug reactions</article-title>
          <source>Pharmacoepidemiol Drug Saf</source>
          <year>2002</year>
          <month>02</month>
          <day>06</day>
          <volume>11</volume>
          <issue>1</issue>
          <fpage>3</fpage>
          <lpage>10</lpage>
          <pub-id pub-id-type="doi">10.1002/pds.668</pub-id>
          <pub-id pub-id-type="medline">11998548</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ahmed</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Pariente</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Tubert-Bitter</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Class-imbalanced subsampling lasso algorithm for discovering adverse drug reactions</article-title>
          <source>Stat Methods Med Res</source>
          <year>2018</year>
          <month>03</month>
          <day>25</day>
          <volume>27</volume>
          <issue>3</issue>
          <fpage>785</fpage>
          <lpage>797</lpage>
          <pub-id pub-id-type="doi">10.1177/0962280216643116</pub-id>
          <pub-id pub-id-type="medline">27114328</pub-id>
          <pub-id pub-id-type="pii">0962280216643116</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Caster</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Norén</surname>
              <given-names>GN</given-names>
            </name>
            <name name-style="western">
              <surname>Madigan</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Bate</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Large‐scale regression‐based pattern discovery: the example of screening the WHO global drug safety database</article-title>
          <source>Statistical Analysis</source>
          <year>2010</year>
          <month>07</month>
          <day>20</day>
          <volume>3</volume>
          <issue>4</issue>
          <fpage>197</fpage>
          <lpage>208</lpage>
          <pub-id pub-id-type="doi">10.1002/sam.10078</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Harpaz</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>DuMouchel</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>LePendu</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Bauer-Mehren</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Ryan</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Shah</surname>
              <given-names>NH</given-names>
            </name>
          </person-group>
          <article-title>Performance of pharmacovigilance signal-detection algorithms for the FDA adverse event reporting system</article-title>
          <source>Clin Pharmacol Ther</source>
          <year>2013</year>
          <month>06</month>
          <day>11</day>
          <volume>93</volume>
          <issue>6</issue>
          <fpage>539</fpage>
          <lpage>46</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/23571771"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/clpt.2013.24</pub-id>
          <pub-id pub-id-type="medline">23571771</pub-id>
          <pub-id pub-id-type="pii">clpt201324</pub-id>
          <pub-id pub-id-type="pmcid">PMC3857139</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <collab>Team R</collab>
          </person-group>
          <article-title>The R Project for Statistical Computing</article-title>
          <source>R Foundation</source>
          <access-date>2024-04-26</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.R-project.org/">http://www.R-project.org/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chang</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Cheng</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Allaire</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Sievert</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Schloerke</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Xie</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>shiny: web application framework for R</article-title>
          <source>Comprehensive R Archive Network</source>
          <access-date>2023-01-30</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://CRAN.R-project.org/package=shiny">https://CRAN.R-project.org/package=shiny</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="web">
          <article-title>Regulation (EU) 2016/679 of the European Parliament and of the Council of 27 April 2016 on the protection of natural persons with regard to the processing of personal data and on the free movement of such data, and repealing Directive 95/46/EC (General Data Protection Regulation) (Text with EEA relevance)</article-title>
          <source>EUR-Lex</source>
          <access-date>2024-04-26</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://eur-lex.europa.eu/legal-content/en/TXT/?uri=CELEX:32016R0679">https://eur-lex.europa.eu/legal-content/en/TXT/?uri=CELEX:32016R0679</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <collab>SHA-1</collab>
          </person-group>
          <source>Wikipedia</source>
          <year>2023</year>
          <access-date>2023-01-30</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://en.wikipedia.org/w/index.php?title=SHA-1&#38;oldid=1135933131">https://en.wikipedia.org/w/index.php?title=SHA-1&#38;oldid=1135933131</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Viard</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Parassol-Girard</surname>
              <given-names>Nadège</given-names>
            </name>
            <name name-style="western">
              <surname>Romani</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Van Obberghen</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Rocher</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Berriri</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Drici</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Spontaneous adverse event notifications by patients subsequent to the marketing of a new formulation of Levothyrox amidst a drug media crisis: atypical profile as compared with other drugs</article-title>
          <source>Fundam Clin Pharmacol</source>
          <year>2019</year>
          <month>08</month>
          <day>07</day>
          <volume>33</volume>
          <issue>4</issue>
          <fpage>463</fpage>
          <lpage>470</lpage>
          <pub-id pub-id-type="doi">10.1111/fcp.12446</pub-id>
          <pub-id pub-id-type="medline">30575110</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref38">
        <label>38</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Audeh</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Grouin</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Zweigenbaum</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Bousquet</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Jaulent</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Benkhebil</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>French Levothyrox® crisis: retrospective analysis of social media</article-title>
          <year>2019</year>
          <month>10</month>
          <day>26</day>
          <conf-name>Conference ISOP - International Society of Pharmacovigilance</conf-name>
          <conf-date>October 1, 2019</conf-date>
          <conf-loc>Bogota, Colombie</conf-loc>
          <publisher-loc>Bogota, Colombia</publisher-loc>
          <publisher-name>Springer International Publishing</publisher-name>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://hal.archives-ouvertes.fr/hal-02411632"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref39">
        <label>39</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Devlin</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Chang</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Toutanova</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>BERT: Pre-training of deep bidirectional transformers for language understanding</article-title>
          <source>Proceedings of NAACL-HLT 2019</source>
          <year>2019</year>
          <month>06</month>
          <day>07</day>
          <conf-name>Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies</conf-name>
          <conf-date>June 2-7, 2019</conf-date>
          <conf-loc>Minneapolis, MN</conf-loc>
          <fpage>4171</fpage>
          <lpage>4186</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://aclanthology.org/N19-1423.pdf"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref40">
        <label>40</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Campillos-Llanos</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Grouin</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Lillo-Le Louët</surname>
              <given-names>Agnès</given-names>
            </name>
            <name name-style="western">
              <surname>Zweigenbaum</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Initial experiments for pharmacovigilance analysis in social media using summaries of product characteristics</article-title>
          <source>Stud Health Technol Inform</source>
          <year>2019</year>
          <month>08</month>
          <day>21</day>
          <volume>264</volume>
          <fpage>60</fpage>
          <lpage>64</lpage>
          <pub-id pub-id-type="doi">10.3233/SHTI190183</pub-id>
          <pub-id pub-id-type="medline">31437885</pub-id>
          <pub-id pub-id-type="pii">SHTI190183</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref41">
        <label>41</label>
        <nlm-citation citation-type="web">
          <article-title>Vigi4Med Scraper</article-title>
          <source>GitHub</source>
          <access-date>2024-04-22</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://github.com/bissana/Vigi4Med-Scraper">https://github.com/bissana/Vigi4Med-Scraper</ext-link>
          </comment>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
