<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article article-type="letter" dtd-version="2.0" xmlns:xlink="http://www.w3.org/1999/xlink">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMIR</journal-id>
      <journal-id journal-id-type="nlm-ta">J Med Internet Res</journal-id>
      <journal-title>Journal of Medical Internet Research</journal-title>
      <issn pub-type="epub">1438-8871</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v26i1e50652</article-id>
      <article-id pub-id-type="pmid">38526542</article-id>
      <article-id pub-id-type="doi">10.2196/50652</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Research Letter</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Research Letter</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Using Longitudinal Twitter Data for Digital Epidemiology of Childhood Health Outcomes: An Annotated Data Set and Deep Neural Network Classifiers</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>de Azevedo Cardoso</surname>
            <given-names>Taiane</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Ni</surname>
            <given-names>Congning</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Guo</surname>
            <given-names>Eddie</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author">
          <name name-style="western">
            <surname>Klein</surname>
            <given-names>Ari Z</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-8281-3464</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author">
          <name name-style="western">
            <surname>Gutiérrez Gómez</surname>
            <given-names>José Agustín</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0006-9394-8352</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Levine</surname>
            <given-names>Lisa D</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-6811-7980</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Gonzalez-Hernandez</surname>
            <given-names>Graciela</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff4" ref-type="aff">4</xref>
          <address>
            <institution>Department of Computational Biomedicine</institution>
            <institution>Cedars-Sinai Medical Center</institution>
            <addr-line>Pacific Design Center, Ste G549F</addr-line>
            <addr-line>700 N San Vicente Blvd</addr-line>
            <addr-line>West Hollywood, CA, 90069</addr-line>
            <country>United States</country>
            <phone>1 310 423 3521</phone>
            <email>Graciela.GonzalezHernandez@csmc.edu</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-6416-9556</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Department of Biostatistics, Epidemiology, and Informatics</institution>
        <institution>Perelman School of Medicine</institution>
        <institution>University of Pennsylvania</institution>
        <addr-line>Philadelphia, PA</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Department of Health Sciences</institution>
        <institution>University of Monterrey</institution>
        <addr-line>San Pedro Garza García, Nuevo León</addr-line>
        <country>Mexico</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>Department of Obstetrics and Gynecology</institution>
        <institution>Perelman School of Medicine</institution>
        <institution>University of Pennsylvania</institution>
        <addr-line>Philadelphia, PA</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff4">
        <label>4</label>
        <institution>Department of Computational Biomedicine</institution>
        <institution>Cedars-Sinai Medical Center</institution>
        <addr-line>West Hollywood, CA</addr-line>
        <country>United States</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Graciela Gonzalez-Hernandez <email>Graciela.GonzalezHernandez@csmc.edu</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <year>2024</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>25</day>
        <month>3</month>
        <year>2024</year>
      </pub-date>
      <volume>26</volume>
      <elocation-id>e50652</elocation-id>
      <history>
        <date date-type="received">
          <day>7</day>
          <month>7</month>
          <year>2023</year>
        </date>
        <date date-type="rev-request">
          <day>22</day>
          <month>8</month>
          <year>2023</year>
        </date>
        <date date-type="rev-recd">
          <day>5</day>
          <month>9</month>
          <year>2023</year>
        </date>
        <date date-type="accepted">
          <day>19</day>
          <month>9</month>
          <year>2023</year>
        </date>
      </history>
      <copyright-statement>©Ari Z Klein, José Agustín Gutiérrez Gómez, Lisa D Levine, Graciela Gonzalez-Hernandez. Originally published in the Journal of Medical Internet Research (https://www.jmir.org), 25.03.2024.</copyright-statement>
      <copyright-year>2024</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in the Journal of Medical Internet Research, is properly cited. The complete bibliographic information, a link to the original publication on https://www.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://www.jmir.org/2024/1/e50652" xlink:type="simple"/>
      <abstract>
        <p>We manually annotated 9734 tweets that were posted by users who reported their pregnancy on Twitter, and used them to train, evaluate, and deploy deep neural network classifiers (<italic>F</italic><sub>1</sub>-score=0.93) to detect tweets that report having a child with attention-deficit/hyperactivity disorder (678 users), autism spectrum disorders (1744 users), delayed speech (902 users), or asthma (1255 users), demonstrating the potential of Twitter as a complementary resource for assessing associations between pregnancy exposures and childhood health outcomes on a large scale.</p>
      </abstract>
      <kwd-group>
        <kwd>natural language processing</kwd>
        <kwd>machine learning</kwd>
        <kwd>data mining</kwd>
        <kwd>social media</kwd>
        <kwd>Twitter</kwd>
        <kwd>pregnancy</kwd>
        <kwd>epidemiology</kwd>
        <kwd>developmental disabilities</kwd>
        <kwd>asthma</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <p>Many children are diagnosed with disorders that can impact their daily lives and can last throughout their lifetime. For example, in the United States, 17% of children are diagnosed with a developmental disability [<xref ref-type="bibr" rid="ref1">1</xref>] and 8% of them with asthma [<xref ref-type="bibr" rid="ref2">2</xref>]. Meanwhile, data sources for assessing the association of these outcomes with pregnancy exposures are limited, as pregnancy registries typically follow infants for up to 1 year after birth. While our previous work [<xref ref-type="bibr" rid="ref3">3</xref>,<xref ref-type="bibr" rid="ref4">4</xref>] demonstrated the utility of Twitter as a source of data regarding pregnancy outcomes, the ability to continue collecting users’ tweets on an ongoing basis after birth may present opportunities to detect outcomes in childhood. Twitter data have been used to identify self-reports of attention-deficit/hyperactivity disorder (ADHD) [<xref ref-type="bibr" rid="ref5">5</xref>], autism spectrum disorders (ASD) [<xref ref-type="bibr" rid="ref6">6</xref>], and asthma [<xref ref-type="bibr" rid="ref7">7</xref>], but not to identify reports of these disorders in users’ children. This study aimed to assess whether there are users who report having a child with ADHD, ASD, delayed speech, or asthma, and develop and evaluate an automated method for identifying these reports.</p>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Ethical Considerations</title>
        <p>The study data were collected and analyzed in accordance with the Twitter Terms of Service. The institutional review boards of the University of Pennsylvania and Cedars-Sinai Medical Center deemed this study exempt.</p>
      </sec>
      <sec>
        <title>Data Collection</title>
        <p>We searched for mentions of ADHD, ASD, delayed speech, and asthma among all the tweets posted by more than 100,000 users who reported their pregnancy on Twitter [<xref ref-type="bibr" rid="ref8">8</xref>]. We then searched these matching tweets for references to a child and the user, and excluded tweets that matched specific patterns indicating the user’s own disorder. The query (<xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>) returned 36,094 tweets (excluding retweets) posted by 11,712 users.</p>
      </sec>
      <sec>
        <title>Annotation</title>
        <p>We used 400 matching tweets—100 per outcome—to develop annotation guidelines (<xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>) for distinguishing those that report having a child with a disorder from those that do not. An additional 9334 tweets—1 random tweet per user—were then independently annotated: 8334 by 2 annotators and 1000 by all 3. Interannotator agreement (Fleiss kappa) was 0.88. After resolving disagreements among all 9734 tweets, we determined that 3019 (31%) reported having a child with a disorder and 6715 (69%) did not.</p>
      </sec>
      <sec>
        <title>Automatic Classification</title>
        <p>We split the 9734 tweets into 80% (n=7787) training (<xref ref-type="supplementary-material" rid="app3">Multimedia Appendix 3</xref>) and 20% (n=1947) test data, and performed machine learning experiments using deep neural network classifiers based on bidirectional encoder representations from transformers (BERT) [<xref ref-type="bibr" rid="ref9">9</xref>]: the BERT-Base-Uncased, RoBERTa-Large, and BERTweet-Large pretrained models in the <italic>Huggingface</italic> library. Our preprocessing included normalizing URLs and usernames, and lowercasing the tweets. For training, we used Adam optimization, 5 epochs, a batch size of 8, and a learning rate of 0.00001, based on evaluating after each epoch using a 5% split of the training set. We fine-tuned all layers of the models with our annotated tweets.</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <p><xref ref-type="table" rid="table1">Table 1</xref> presents the performance of the classifiers. The RoBERTa-Large [<xref ref-type="bibr" rid="ref10">10</xref>] classifier achieved the highest overall <italic>F</italic><sub>1</sub>-score (0.93). <xref ref-type="table" rid="table1">Table 1</xref> also presents the performance of the RoBERTa-Large classifier for tweets that mention specific outcomes. We deployed the RoBERTa-Large classifier on the additional 26,360 unlabeled tweets that matched our query (<xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). Between the 9734 manually annotated tweets and the 26,360 automatically classified tweets, we identified 3806 total users who reported having a child with ADHD (n=678), ASD (n=1744), delayed speech (n=902), or asthma (n=1255).</p>
      <p><xref ref-type="table" rid="table2">Table 2</xref> presents examples of tweets in the test set that were misclassified by the RoBERTa-Large classifier. While 28 (58%) of the 48 false positives do refer to the user’s child, 11 (39%) indicate that someone other than the user’s child has a disorder (tweet 1), and 9 (32%) indicate that a disorder is merely suspected or exhibited (tweet 2). Among the other 20 (42%) of the 48 false positives, 10 (50%) are reported speech, such as quotations (tweet 3). Among the 42 false negatives, 22 (52%) do not explicitly mention the user’s child (tweet 4)—for example, using a pronoun or name—and 14 (33%) do not explicitly indicate that the child has a disorder (tweet 5).</p>
      <table-wrap position="float" id="table1">
        <label>Table 1</label>
        <caption>
          <p>Precision, recall, and <italic>F</italic><sub>1</sub>-score of classifiers for the class of tweets that report having a child with attention-deficit/hyperactivity disorder (ADHD), autism spectrum disorder (ASD), delayed speech, or asthma, including the outcome-specific precision, recall, and <italic>F</italic><sub>1</sub>-score for the RoBERTa-Large classifier.</p>
        </caption>
        <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
          <col width="30"/>
          <col width="270"/>
          <col width="230"/>
          <col width="230"/>
          <col width="240"/>
          <thead>
            <tr valign="top">
              <td colspan="2">Classifier</td>
              <td>Precision</td>
              <td>Recall</td>
              <td><italic>F</italic><sub>1</sub>-score</td>
            </tr>
          </thead>
          <tbody>
            <tr valign="top">
              <td colspan="2">BERT-Base-Uncased</td>
              <td>0.83</td>
              <td>0.87</td>
              <td>0.85</td>
            </tr>
            <tr valign="top">
              <td colspan="2">BERTweet-Large</td>
              <td>0.89</td>
              <td>0.94</td>
              <td>0.92</td>
            </tr>
            <tr valign="top">
              <td colspan="2">
                <bold>RoBERTa-Large</bold>
              </td>
              <td>0.92</td>
              <td>0.94</td>
              <td>0.93</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>ADHD</td>
              <td>0.91</td>
              <td>0.85</td>
              <td>0.88</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>ASD</td>
              <td>0.94</td>
              <td>0.92</td>
              <td>0.93</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Delayed speech</td>
              <td>0.94</td>
              <td>0.96</td>
              <td>0.95</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Asthma</td>
              <td>0.91</td>
              <td>0.96</td>
              <td>0.94</td>
            </tr>
          </tbody>
        </table>
      </table-wrap>
      <table-wrap position="float" id="table2">
        <label>Table 2</label>
        <caption>
          <p>Sample false positives and false negatives of a RoBERTa-Large classifier for detecting tweets that report having a child with attention-deficit/hyperactivity disorder (ADHD), autism spectrum disorder (ASD), delayed speech, or asthma (with the text that matched the data collection query in italics).</p>
        </caption>
        <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
          <col width="90"/>
          <col width="710"/>
          <col width="90"/>
          <col width="110"/>
          <thead>
            <tr valign="top">
              <td>Tweet number</td>
              <td>Tweet</td>
              <td>Actual</td>
              <td>Predicted</td>
            </tr>
          </thead>
          <tbody>
            <tr valign="top">
              <td>1</td>
              <td>So Maxine Waters can be maskless on a plane but <italic>I</italic> can’t fly with <italic>my 2 year old</italic> cause she won’t wear a mask? <italic>Kids</italic> with <italic>autism</italic> are being banned from flying because they won’t wear a mask?</td>
              <td>–</td>
              <td>+</td>
            </tr>
            <tr valign="top">
              <td>2</td>
              <td>they treat <italic>my baby</italic> with <italic>asthma</italic> meds all the time but didn’t diagnose her with it im pretty sure she has it tho</td>
              <td>–</td>
              <td>+</td>
            </tr>
            <tr valign="top">
              <td>3</td>
              <td>Any tips for this mum: “<italic>My daughter</italic> is 10. <italic>My</italic> parents would like to gift her either a phone or a smart watch which is easy to use and won’t be easily damaged by a very active <italic>ADHD kid</italic>... <italic>I</italic> need help choosi… [URL]</td>
              <td>–</td>
              <td>+</td>
            </tr>
            <tr valign="top">
              <td>4</td>
              <td>Flying tomorrow...during a pandemic with a <italic>nonverbal 3 year old</italic>. <italic>We</italic> could use some prayers, please.<inline-graphic xlink:href="jmir_v26i1e50652_fig1.png" xlink:type="simple" mimetype="image"/></td>
              <td>+</td>
              <td>–</td>
            </tr>
            <tr valign="top">
              <td>5</td>
              <td><italic>I</italic> wouldn’t change <italic>my child</italic> for anything in the world. <italic>I</italic>’m just curious to know where <italic>autism</italic> came from because <italic>me</italic> and his dad don’t have any family members that are <italic>autistic</italic>. It’s just a question out of curiosity</td>
              <td>+</td>
              <td>–</td>
            </tr>
          </tbody>
        </table>
      </table-wrap>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <p>Our ability to identify Twitter data during pregnancy for users who reported having a child with ADHD, ASD, delayed speech, or asthma suggests that Twitter could be a complementary resource for assessing associations between pregnancy exposures and childhood health outcomes, with potential clinical implications for informing prenatal care. The overall and outcome-specific performance for automatically identifying these outcomes demonstrates the feasibility of using Twitter data for observational studies on a large scale.</p>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>Data collection query.</p>
        <media xlink:href="jmir_v26i1e50652_app1.txt" xlink:title="TXT File , 11 KB"/>
      </supplementary-material>
      <supplementary-material id="app2">
        <label>Multimedia Appendix 2</label>
        <p>Annotation guidelines.</p>
        <media xlink:href="jmir_v26i1e50652_app2.docx" xlink:title="DOCX File , 28 KB"/>
      </supplementary-material>
      <supplementary-material id="app3">
        <label>Multimedia Appendix 3</label>
        <p>Training data.</p>
        <media xlink:href="jmir_v26i1e50652_app3.txt" xlink:title="TXT File , 165 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">ADHD</term>
          <def>
            <p>attention-deficit/hyperactivity disorder</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">ASD</term>
          <def>
            <p>autism spectrum disorder</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">BERT</term>
          <def>
            <p>bidirectional encoder representations from transformers</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>This work was supported by the National Library of Medicine (R01LM011176). The content is solely the responsibility of the authors and does not necessarily represent the official views of the National Institutes of Health. The authors thank Ivan Flores for contributing to software applications and Karen O’Connor for contributing to annotating the Twitter data. Generative artificial intelligence was not used for any portion of the study or manuscript writing.</p>
    </ack>
    <notes>
      <sec>
        <title>Data Availability</title>
        <p>The manually annotated training data are included with this article in <xref ref-type="supplementary-material" rid="app3">Multimedia Appendix 3</xref>. In accordance with the Twitter Terms of Service, these tweets are made available as tweet IDs, which can be rehydrated as tweet objects if they remain public at the time they are requested through the Twitter API.</p>
      </sec>
    </notes>
    <fn-group>
      <fn fn-type="con">
        <p>AZK and JAGG contributed to the data collection, annotation, machine learning experiments, error analysis, and drafting of the manuscript. LDL provided guidance on pregnancy outcomes and edited the manuscript. GGH designed and guided the study and edited the manuscript.</p>
      </fn>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zablotsky</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Black</surname>
              <given-names>LI</given-names>
            </name>
            <name name-style="western">
              <surname>Maenner</surname>
              <given-names>MJ</given-names>
            </name>
            <name name-style="western">
              <surname>Schieve</surname>
              <given-names>LA</given-names>
            </name>
            <name name-style="western">
              <surname>Danielson</surname>
              <given-names>ML</given-names>
            </name>
            <name name-style="western">
              <surname>Bitsko</surname>
              <given-names>RH</given-names>
            </name>
            <name name-style="western">
              <surname>Blumberg</surname>
              <given-names>SJ</given-names>
            </name>
            <name name-style="western">
              <surname>Kogan</surname>
              <given-names>MD</given-names>
            </name>
            <name name-style="western">
              <surname>Boyle</surname>
              <given-names>CA</given-names>
            </name>
          </person-group>
          <article-title>Prevalence and trends of developmental disabilities among children in the United States: 2009-2017</article-title>
          <source>Pediatrics</source>
          <year>2019</year>
          <month>10</month>
          <volume>144</volume>
          <issue>4</issue>
          <fpage>e20190811</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/31558576"/>
          </comment>
          <pub-id pub-id-type="doi">10.1542/peds.2019-0811</pub-id>
          <pub-id pub-id-type="medline">31558576</pub-id>
          <pub-id pub-id-type="pii">peds.2019-0811</pub-id>
          <pub-id pub-id-type="pmcid">PMC7076808</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zahran</surname>
              <given-names>HS</given-names>
            </name>
            <name name-style="western">
              <surname>Bailey</surname>
              <given-names>CM</given-names>
            </name>
            <name name-style="western">
              <surname>Damon</surname>
              <given-names>SA</given-names>
            </name>
            <name name-style="western">
              <surname>Garbe</surname>
              <given-names>PL</given-names>
            </name>
            <name name-style="western">
              <surname>Breysse</surname>
              <given-names>PN</given-names>
            </name>
          </person-group>
          <article-title>Vital signs: asthma in children - United States, 2001-2016</article-title>
          <source>MMWR Morb Mortal Wkly Rep</source>
          <year>2018</year>
          <month>02</month>
          <day>09</day>
          <volume>67</volume>
          <issue>5</issue>
          <fpage>149</fpage>
          <lpage>155</lpage>
          <pub-id pub-id-type="doi">10.15585/mmwr.mm6705e1</pub-id>
          <pub-id pub-id-type="medline">29420459</pub-id>
          <pub-id pub-id-type="pmcid">PMC5812476</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Golder</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Chiuve</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Weissenbacher</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Klein</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>O'Connor</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Bland</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Malin</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Bhattacharya</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Scarazzini</surname>
              <given-names>LJ</given-names>
            </name>
            <name name-style="western">
              <surname>Gonzalez-Hernandez</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Pharmacoepidemiologic evaluation of birth defects from health-related postings in social media during pregnancy</article-title>
          <source>Drug Saf</source>
          <year>2019</year>
          <month>03</month>
          <day>3</day>
          <volume>42</volume>
          <issue>3</issue>
          <fpage>389</fpage>
          <lpage>400</lpage>
          <pub-id pub-id-type="doi">10.1007/s40264-018-0731-6</pub-id>
          <pub-id pub-id-type="medline">30284214</pub-id>
          <pub-id pub-id-type="pmcid">PMC6426821</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Klein</surname>
              <given-names>AZ</given-names>
            </name>
            <name name-style="western">
              <surname>O'Connor</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Levine</surname>
              <given-names>LD</given-names>
            </name>
            <name name-style="western">
              <surname>Gonzalez-Hernandez</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Using Twitter data for cohort studies of drug safety in pregnancy: proof-of-concept with β-blockers</article-title>
          <source>JMIR Form Res</source>
          <year>2022</year>
          <month>06</month>
          <day>30</day>
          <volume>6</volume>
          <issue>6</issue>
          <fpage>e36771</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://formative.jmir.org/2022/6/e36771/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/36771</pub-id>
          <pub-id pub-id-type="medline">35771614</pub-id>
          <pub-id pub-id-type="pii">v6i6e36771</pub-id>
          <pub-id pub-id-type="pmcid">PMC9284350</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Guntuku</surname>
              <given-names>SC</given-names>
            </name>
            <name name-style="western">
              <surname>Ramsay</surname>
              <given-names>JR</given-names>
            </name>
            <name name-style="western">
              <surname>Merchant</surname>
              <given-names>RM</given-names>
            </name>
            <name name-style="western">
              <surname>Ungar</surname>
              <given-names>LH</given-names>
            </name>
          </person-group>
          <article-title>Language of ADHD in adults on social media</article-title>
          <source>J Atten Disord</source>
          <year>2019</year>
          <month>10</month>
          <day>08</day>
          <volume>23</volume>
          <issue>12</issue>
          <fpage>1475</fpage>
          <lpage>1485</lpage>
          <pub-id pub-id-type="doi">10.1177/1087054717738083</pub-id>
          <pub-id pub-id-type="medline">29115168</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hswen</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Gopaluni</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Brownstein</surname>
              <given-names>JS</given-names>
            </name>
            <name name-style="western">
              <surname>Hawkins</surname>
              <given-names>JB</given-names>
            </name>
          </person-group>
          <article-title>Using Twitter to detect psychological characteristics of self-identified persons with autism spectrum disorder: a feasibility study</article-title>
          <source>JMIR Mhealth Uhealth</source>
          <year>2019</year>
          <month>02</month>
          <day>12</day>
          <volume>7</volume>
          <issue>2</issue>
          <fpage>e12264</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://mhealth.jmir.org/2019/2/e12264/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/12264</pub-id>
          <pub-id pub-id-type="medline">30747718</pub-id>
          <pub-id pub-id-type="pii">v7i2e12264</pub-id>
          <pub-id pub-id-type="pmcid">PMC6390184</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Edo-Osagie</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Smith</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Lake</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Edeghere</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>De La Iglesia</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>Twitter mining using semi-supervised classification for relevance filtering in syndromic surveillance</article-title>
          <source>PLoS One</source>
          <year>2019</year>
          <volume>14</volume>
          <issue>7</issue>
          <fpage>e0210689</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.plos.org/10.1371/journal.pone.0210689"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0210689</pub-id>
          <pub-id pub-id-type="medline">31318885</pub-id>
          <pub-id pub-id-type="pii">PONE-D-18-34849</pub-id>
          <pub-id pub-id-type="pmcid">PMC6638773</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Klein</surname>
              <given-names>AZ</given-names>
            </name>
            <name name-style="western">
              <surname>Kunatharaju</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>O'Connor</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Gonzalez-Hernandez</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Pregex: rule-based detection and extraction of Twitter data in pregnancy</article-title>
          <source>J Med Internet Res</source>
          <year>2023</year>
          <month>02</month>
          <day>09</day>
          <volume>25</volume>
          <fpage>e40569</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2023//e40569/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/40569</pub-id>
          <pub-id pub-id-type="medline">36757756</pub-id>
          <pub-id pub-id-type="pii">v25i1e40569</pub-id>
          <pub-id pub-id-type="pmcid">PMC9951068</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Devlin</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Cheng</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Toutanova</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>BERT: pre-training of deep bidirectional transformers for language understanding</article-title>
          <year>2019</year>
          <conf-name>17th Annual Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies</conf-name>
          <conf-date>June 2-7, 2019</conf-date>
          <conf-loc>Minneapolis, MN</conf-loc>
          <fpage>4171</fpage>
          <lpage>4186</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Ott</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Goyal</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Du</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Joshi</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Levy</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Lewis</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Zettlemoyer</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Stoyanov</surname>
              <given-names>V</given-names>
            </name>
          </person-group>
          <article-title>RoBERTa: a robustly optimized BERT pretraining approach</article-title>
          <source>arXiv</source>
          <access-date>2023-07-07</access-date>
          <comment>Preprint posted online July 26, 2019. <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1907.11692">https://arxiv.org/abs/1907.11692</ext-link>
          </comment>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
