<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="letter" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMIR</journal-id>
      <journal-id journal-id-type="nlm-ta">J Med Internet Res</journal-id>
      <journal-title>Journal of Medical Internet Research</journal-title>
      <issn pub-type="epub">1438-8871</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v25i1e40569</article-id>
      <article-id pub-id-type="pmid">36757756</article-id>
      <article-id pub-id-type="doi">10.2196/40569</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Research Letter</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Research Letter</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Pregex: Rule-Based Detection and Extraction of Twitter Data in Pregnancy</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Kukafka</surname>
            <given-names>Rita</given-names>
          </name>
        </contrib>
        <contrib contrib-type="editor">
          <name>
            <surname>Eysenbach</surname>
            <given-names>Gunther</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Zhao</surname>
            <given-names>Xinyan</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Foufi</surname>
            <given-names>Vasiliki</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author">
          <name name-style="western">
            <surname>Klein</surname>
            <given-names>Ari Z</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-8281-3464</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author">
          <name name-style="western">
            <surname>Kunatharaju</surname>
            <given-names>Shriya</given-names>
          </name>
          <degrees>BS</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-6042-1745</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>O'Connor</surname>
            <given-names>Karen</given-names>
          </name>
          <degrees>MS</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-7709-3813</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Gonzalez-Hernandez</surname>
            <given-names>Graciela</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <address>
            <institution>Department of Computational Biomedicine</institution>
            <institution>Cedars-Sinai Medical Center</institution>
            <addr-line>Pacific Design Center, Suite G549F</addr-line>
            <addr-line>700 North San Vicente Boulevard</addr-line>
            <addr-line>West Hollywood, CA, 90069</addr-line>
            <country>United States</country>
            <phone>1 310 423 3521</phone>
            <email>Graciela.GonzalezHernandez@csmc.edu</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-6416-9556</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Department of Biostatistics, Epidemiology, and Informatics</institution>
        <institution>Perelman School of Medicine</institution>
        <institution>University of Pennsylvania</institution>
        <addr-line>Philadelphia, PA</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Department of Computational Biomedicine</institution>
        <institution>Cedars-Sinai Medical Center</institution>
        <addr-line>West Hollywood, CA</addr-line>
        <country>United States</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Graciela Gonzalez-Hernandez <email>Graciela.GonzalezHernandez@csmc.edu</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <year>2023</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>9</day>
        <month>2</month>
        <year>2023</year>
      </pub-date>
      <volume>25</volume>
      <elocation-id>e40569</elocation-id>
      <history>
        <date date-type="received">
          <day>27</day>
          <month>6</month>
          <year>2022</year>
        </date>
        <date date-type="rev-request">
          <day>15</day>
          <month>8</month>
          <year>2022</year>
        </date>
        <date date-type="rev-recd">
          <day>2</day>
          <month>9</month>
          <year>2022</year>
        </date>
        <date date-type="accepted">
          <day>22</day>
          <month>1</month>
          <year>2023</year>
        </date>
      </history>
      <copyright-statement>©Ari Z Klein, Shriya Kunatharaju, Karen O'Connor, Graciela Gonzalez-Hernandez. Originally published in the Journal of Medical Internet Research (https://www.jmir.org), 09.02.2023.</copyright-statement>
      <copyright-year>2023</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in the Journal of Medical Internet Research, is properly cited. The complete bibliographic information, a link to the original publication on https://www.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://www.jmir.org/2023/1/e40569" xlink:type="simple"/>
      <kwd-group>
        <kwd>natural language processing</kwd>
        <kwd>data mining</kwd>
        <kwd>social media</kwd>
        <kwd>pregnancy</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <p>Data on potential risk factors in pregnancy are limited. Meanwhile, in the United States, 17% of pregnancies end in fetal loss [<xref ref-type="bibr" rid="ref1">1</xref>], and birth defects and preterm births are the leading causes of infant mortality [<xref ref-type="bibr" rid="ref2">2</xref>]. In previous work [<xref ref-type="bibr" rid="ref3">3</xref>], we developed an automated natural language processing pipeline that identifies users who announced their pregnancy on Twitter and collects all of their tweets on an ongoing basis. We have also demonstrated that their tweets can be used for observational studies [<xref ref-type="bibr" rid="ref4">4</xref>-<xref ref-type="bibr" rid="ref6">6</xref>]. However, selecting users for such studies involves additional processing to address a limitation of our pipeline—namely, that many of the users refer to a pregnancy either that occurred prior to the availability of their tweets or for which we could not determine the prenatal period. To streamline the use of Twitter as a source of data, the objective of this study was to advance a downstream system developed in our previous work [<xref ref-type="bibr" rid="ref7">7</xref>] and evaluate its upstream use for identifying tweets that indicate the availability of Twitter data during pregnancy and can be used to extract dates marking the beginning and end of the 40-week prenatal period.</p>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Ethical Considerations</title>
        <p>The data used in this study were collected in accordance with the Twitter Terms of Service. The institutional review board of the University of Pennsylvania reviewed this study and deemed it exempt human subjects research under 45 CFR §46.101(b)(4) for publicly available data sources.</p>
      </sec>
      <sec>
        <title>Natural Language Processing System</title>
        <p>Our system, Pregex [<xref ref-type="bibr" rid="ref8">8</xref>], uses more than 100 handwritten regular expressions to search for tweets in which users indicate their gestational age or due date, including as units of time, days of the week, numeric and spelled-out dates, and linguistic markers. We took an iterative approach [<xref ref-type="bibr" rid="ref9">9</xref>] to develop the regular expressions, allowing us to actively reduce noise and account for ways that this information may be presented on Twitter, including in hashtags and with lexical variants [<xref ref-type="bibr" rid="ref10">10</xref>]. <xref ref-type="table" rid="table1">Table 1</xref> presents sample matching tweets.</p>
        <p>Pregex uses the <italic>dateutil</italic> Python package to apply an arithmetic operation to the tweets’ timestamp, based on the regular expression that the tweets match. For tweet 1, after replacing <italic>5 1/2 mos</italic> with <italic>5 months and 2 weeks</italic> in preprocessing, Pregex assigns the first digit group (<italic>5</italic>) to the <italic>months</italic> parameter of the <italic>relativedelta</italic> function and the second digit group (<italic>2</italic>) to the <italic>weeks</italic> parameter, subtracts this <italic>relativedelta</italic> from the timestamp to calculate the start date of the 40-week prenatal period, and then adds <italic>40 weeks</italic> to the start date to calculate the due date. For tweet 2, Pregex assigns <italic>Saturday</italic> to the <italic>weekday</italic> parameter to calculate the due date and then subtracts <italic>40 weeks</italic> from the due date to calculate the start date.</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Sample tweets detected by Pregex (matching pattern in italics).</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="480"/>
            <col width="160"/>
            <col width="180"/>
            <col width="180"/>
            <thead>
              <tr valign="top">
                <td>Tweet</td>
                <td>Timestamp</td>
                <td>Pregnancy start</td>
                <td>Pregnancy end</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td><italic>I am 5 1/2 mos pregnant</italic> &#38; severely anemic. I had hypermesis in my first trimester.</td>
                <td>November 11, 2020</td>
                <td>June 9, 2020</td>
                <td>March 16, 2021</td>
              </tr>
              <tr valign="top">
                <td><italic>My due date is Saturday</italic> and I hope my baby boy is ready to come on out lol</td>
                <td>January 23, 2020</td>
                <td>April 20, 2019</td>
                <td>January 25, 2020</td>
              </tr>
              <tr valign="top">
                <td>Is October too early to have a baby shower when <italic>I’m due Feb 8th</italic>? I want a Halloween themed baby shower</td>
                <td>July 23, 2020</td>
                <td>May 4, 2020</td>
                <td>February 8, 2021</td>
              </tr>
              <tr valign="top">
                <td>I can’t wait until my pregnancy pillow comes, having the worst nights sleep <italic>#21weekspregnant</italic></td>
                <td>April 18, 2020</td>
                <td>November 23, 2019</td>
                <td>August 29, 2020</td>
              </tr>
              <tr valign="top">
                <td>i can’t believe <italic>i’m already half way through my pregnancy</italic>, this heat is really starting to get to me now</td>
                <td>June 19, 2021</td>
                <td>January 30, 2021</td>
                <td>November 6, 2021</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <p>We deployed Pregex on the Twitter timelines of more than 550,000 users—mostly users identified by our original pipeline [<xref ref-type="bibr" rid="ref3">3</xref>]—and detected approximately 235,000 tweets that were posted by more than 100,000 users. For validation, 3 annotators labeled a random sample of 4017 matching tweets—1 tweet per user and up to 100 tweets per regular expression—to identify whether they self-report an ongoing pregnancy, and the correct beginning and end dates were extracted (<xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). Among these 4017 tweets, 3716 (90%) were dual annotated and 400 (10%) were annotated by all 3 annotators. For 381 (95%) of these 400 tweets, the 3 annotators agreed on whether the tweet self-reports an ongoing pregnancy, agreeing that 378 (99%) of these 381 tweets do. For 376 (99%) of these 378 tweets, the 3 annotators agreed on whether the correct beginning and end dates were extracted. After resolving disagreements among all 4017 tweets, we established that Pregex had a precision of 0.96 for identifying ongoing pregnancies, where <italic>precision = true positives / (true positives + false positives)</italic>. Among the 3875 true positives, Pregex had a precision of 0.99 for extracting dates marking the beginning and end of the 40-week prenatal period.</p>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <p>Because pregnancy is a common event, our rule-based approach can identify tweets during pregnancy with high precision and on a large scale, facilitating the use of Twitter as a complementary source of data for observational studies. In real time, Pregex is detecting approximately 50 new users daily, taking as input approximately 15,000 tweets returned from the Twitter streaming application programming interface that matches pregnancy-related keywords derived from the regular expressions (<xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>). Among the 142 false-positive tweets in our evaluation that did not self-report an ongoing pregnancy, 42 (29%) mention a due date that refers to a deadline (eg, payments), which we will address in future work.</p>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>Annotated tweets for evaluation.</p>
        <media xlink:href="jmir_v25i1e40569_app1.txt" xlink:title="TXT File , 184 KB"/>
      </supplementary-material>
      <supplementary-material id="app2">
        <label>Multimedia Appendix 2</label>
        <p>Pregnancy-related keywords for Twitter streaming application programming interface.</p>
        <media xlink:href="jmir_v25i1e40569_app2.txt" xlink:title="TXT File , 3 KB"/>
      </supplementary-material>
    </app-group>
    <ack>
      <p>The authors thank Ivan Flores for contributing to software applications, and Alexis Upshur for contributing to annotating the Twitter data. This study was supported by the National Library of Medicine (R01LM011176).</p>
    </ack>
    <fn-group>
      <fn fn-type="con">
        <p>AZK developed the regular expressions, contributed to software development, and wrote the paper. SK contributed to software development, annotated the Twitter data, performed the error analysis, and wrote the paper. KO developed the annotation guidelines, annotated the Twitter data, and edited the paper. GGH conceptualized the use of Twitter as a source of pregnancy data and edited the paper.</p>
      </fn>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ventura</surname>
              <given-names>SJ</given-names>
            </name>
            <name name-style="western">
              <surname>Curtin</surname>
              <given-names>SC</given-names>
            </name>
            <name name-style="western">
              <surname>Abma</surname>
              <given-names>JC</given-names>
            </name>
            <name name-style="western">
              <surname>Henshaw</surname>
              <given-names>SK</given-names>
            </name>
          </person-group>
          <article-title>Estimated pregnancy rates and rates of pregnancy outcomes for the United States, 1990-2008</article-title>
          <source>Natl Vital Stat Rep</source>
          <year>2012</year>
          <month>06</month>
          <day>20</day>
          <volume>60</volume>
          <issue>7</issue>
          <fpage>1</fpage>
          <lpage>21</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.cdc.gov/nchs/data/nvsr/nvsr60/nvsr60_07.pdf"/>
          </comment>
          <pub-id pub-id-type="medline">22970648</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>MacDorman</surname>
              <given-names>MF</given-names>
            </name>
            <name name-style="western">
              <surname>Gregory</surname>
              <given-names>ECW</given-names>
            </name>
          </person-group>
          <article-title>Fetal and Perinatal Mortality: United States, 2013</article-title>
          <source>Natl Vital Stat Rep</source>
          <year>2015</year>
          <month>07</month>
          <day>23</day>
          <volume>64</volume>
          <issue>8</issue>
          <fpage>1</fpage>
          <lpage>24</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.cdc.gov/nchs/data/nvsr/nvsr64/nvsr64_08.pdf"/>
          </comment>
          <pub-id pub-id-type="medline">26222771</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sarker</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Chandrashekar</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Magge</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Cai</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Klein</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Gonzalez</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Discovering cohorts of pregnant women from social media for safety surveillance and analysis</article-title>
          <source>J Med Internet Res</source>
          <year>2017</year>
          <month>10</month>
          <day>30</day>
          <volume>19</volume>
          <issue>10</issue>
          <fpage>e361</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2017/10/e361/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/jmir.8164</pub-id>
          <pub-id pub-id-type="medline">29084707</pub-id>
          <pub-id pub-id-type="pii">v19i10e361</pub-id>
          <pub-id pub-id-type="pmcid">PMC5684515</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Golder</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Chiuve</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Weissenbacher</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Klein</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>O'Connor</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Bland</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Malin</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Bhattacharya</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Scarazzini</surname>
              <given-names>LJ</given-names>
            </name>
            <name name-style="western">
              <surname>Gonzalez-Hernandez</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Pharmacoepidemiologic evaluation of birth defects from health-related postings in social media during pregnancy</article-title>
          <source>Drug Saf</source>
          <year>2019</year>
          <month>03</month>
          <volume>42</volume>
          <issue>3</issue>
          <fpage>389</fpage>
          <lpage>400</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/30284214"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/s40264-018-0731-6</pub-id>
          <pub-id pub-id-type="medline">30284214</pub-id>
          <pub-id pub-id-type="pii">10.1007/s40264-018-0731-6</pub-id>
          <pub-id pub-id-type="pmcid">PMC6426821</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Klein</surname>
              <given-names>AZ</given-names>
            </name>
            <name name-style="western">
              <surname>O'Connor</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Gonzalez-Hernandez</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Toward using Twitter data to monitor COVID-19 vaccine safety in pregnancy: proof-of-concept study of cohort identification</article-title>
          <source>JMIR Form Res</source>
          <year>2022</year>
          <month>01</month>
          <day>06</day>
          <volume>6</volume>
          <issue>1</issue>
          <fpage>e33792</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://formative.jmir.org/2022/1/e33792/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/33792</pub-id>
          <pub-id pub-id-type="medline">34870607</pub-id>
          <pub-id pub-id-type="pii">v6i1e33792</pub-id>
          <pub-id pub-id-type="pmcid">PMC8734607</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Klein</surname>
              <given-names>AZ</given-names>
            </name>
            <name name-style="western">
              <surname>O'Connor</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Levine</surname>
              <given-names>LD</given-names>
            </name>
            <name name-style="western">
              <surname>Gonzalez-Hernandez</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Using Twitter data for cohort studies of drug safety in pregnancy: proof-of-concept with β-blockers</article-title>
          <source>JMIR Form Res</source>
          <year>2022</year>
          <month>06</month>
          <day>30</day>
          <volume>6</volume>
          <issue>6</issue>
          <fpage>e36771</fpage>
          <pub-id pub-id-type="doi">10.2196/36771</pub-id>
          <pub-id pub-id-type="medline">35771614</pub-id>
          <pub-id pub-id-type="pii">v6i6e36771</pub-id>
          <pub-id pub-id-type="pmcid">PMC9284350</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rouhizadeh</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Magge</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Klein</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Sarker</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Gonzalez</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>A rule-based approach to determining pregnancy timeframe from contextual social media postings</article-title>
          <source>Proceedings of the 2018 International Conference on Digital Health</source>
          <year>2018</year>
          <conf-name>DH '18</conf-name>
          <conf-date>April 23-26, 2018</conf-date>
          <conf-loc>Lyon, France</conf-loc>
          <fpage>16</fpage>
          <lpage>20</lpage>
          <pub-id pub-id-type="doi">10.1145/3194658.3194679</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="web">
          <article-title>Pregex</article-title>
          <source>Bitbucket</source>
          <access-date>2022-05-02</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bitbucket.org/pennhlp/pregex/">https://bitbucket.org/pennhlp/pregex/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Klein</surname>
              <given-names>AZ</given-names>
            </name>
            <name name-style="western">
              <surname>Sarker</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Cai</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Weissenbacher</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Gonzalez-Hernandez</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Social media mining for birth defects research: a rule-based, bootstrapping approach to collecting data for rare health-related events on Twitter</article-title>
          <source>J Biomed Inform</source>
          <year>2018</year>
          <month>11</month>
          <volume>87</volume>
          <fpage>68</fpage>
          <lpage>78</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(18)30197-7"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2018.10.001</pub-id>
          <pub-id pub-id-type="medline">30292855</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(18)30197-7</pub-id>
          <pub-id pub-id-type="pmcid">PMC6295660</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sarker</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Gonzalez-Hernandez</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>An unsupervised and customizable misspelling generator for mining noisy health-related text sources</article-title>
          <source>J Biomed Inform</source>
          <year>2018</year>
          <month>12</month>
          <volume>88</volume>
          <fpage>98</fpage>
          <lpage>107</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(18)30216-8"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2018.11.007</pub-id>
          <pub-id pub-id-type="medline">30445220</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(18)30216-8</pub-id>
          <pub-id pub-id-type="pmcid">PMC6322919</pub-id>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
