<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="review-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMIR</journal-id>
      <journal-id journal-id-type="nlm-ta">J Med Internet Res</journal-id>
      <journal-title>Journal of Medical Internet Research</journal-title>
      <issn pub-type="epub">1438-8871</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v26i1e47923</article-id>
      <article-id pub-id-type="pmid">38488839</article-id>
      <article-id pub-id-type="doi">10.2196/47923</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Review</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Review</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Methods and Annotated Data Sets Used to Predict the Gender and Age of Twitter Users: Scoping Review</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Mavragani</surname>
            <given-names>Amaryllis</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Ru</surname>
            <given-names>Boshu</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Ni</surname>
            <given-names>Congning</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>O'Connor</surname>
            <given-names>Karen</given-names>
          </name>
          <degrees>MSc</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>Department of Biostatistics, Epidemiology and Informatics</institution>
            <institution>Perelman School of Medicine</institution>
            <institution>University of Pennsylvania</institution>
            <addr-line>423 Guardian Dr</addr-line>
            <addr-line>Philadelphia, PA, 19004</addr-line>
            <country>United States</country>
            <phone>1 215 573 8089</phone>
            <email>karoc@pennmedicine.upenn.edu</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-7709-3813</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author">
          <name name-style="western">
            <surname>Golder</surname>
            <given-names>Su</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-8987-5211</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Weissenbacher</surname>
            <given-names>Davy</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-8331-3675</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Klein</surname>
            <given-names>Ari Z</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-8281-3464</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author">
          <name name-style="western">
            <surname>Magge</surname>
            <given-names>Arjun</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-4109-1346</ext-link>
        </contrib>
        <contrib id="contrib6" contrib-type="author">
          <name name-style="western">
            <surname>Gonzalez-Hernandez</surname>
            <given-names>Graciela</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-6416-9556</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Department of Biostatistics, Epidemiology and Informatics</institution>
        <institution>Perelman School of Medicine</institution>
        <institution>University of Pennsylvania</institution>
        <addr-line>Philadelphia, PA</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Department of Health Sciences</institution>
        <institution>University of York</institution>
        <addr-line>York</addr-line>
        <country>United Kingdom</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>Department of Computational Biomedicine</institution>
        <institution>Cedars-Sinai Medical Center</institution>
        <addr-line>Los Angeles, CA</addr-line>
        <country>United States</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Karen O'Connor <email>karoc@pennmedicine.upenn.edu</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <year>2024</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>15</day>
        <month>3</month>
        <year>2024</year>
      </pub-date>
      <volume>26</volume>
      <elocation-id>e47923</elocation-id>
      <history>
        <date date-type="received">
          <day>5</day>
          <month>4</month>
          <year>2023</year>
        </date>
        <date date-type="rev-request">
          <day>5</day>
          <month>6</month>
          <year>2023</year>
        </date>
        <date date-type="rev-recd">
          <day>28</day>
          <month>7</month>
          <year>2023</year>
        </date>
        <date date-type="accepted">
          <day>1</day>
          <month>8</month>
          <year>2023</year>
        </date>
      </history>
      <copyright-statement>©Karen O'Connor, Su Golder, Davy Weissenbacher, Ari Z Klein, Arjun Magge, Graciela Gonzalez-Hernandez. Originally published in the Journal of Medical Internet Research (https://www.jmir.org), 15.03.2024.</copyright-statement>
      <copyright-year>2024</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in the Journal of Medical Internet Research, is properly cited. The complete bibliographic information, a link to the original publication on https://www.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://www.jmir.org/2024/1/e47923" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Patient health data collected from a variety of nontraditional resources, commonly referred to as <italic>real-world data</italic>, can be a key information source for health and social science research. Social media platforms, such as Twitter (Twitter, Inc), offer vast amounts of real-world data. An important aspect of incorporating social media data in scientific research is identifying the demographic characteristics of the users who posted those data. Age and gender are considered key demographics for assessing the representativeness of the sample and enable researchers to study subgroups and disparities effectively. However, deciphering the age and gender of social media users poses challenges.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>This scoping review aims to summarize the existing literature on the prediction of the age and gender of Twitter users and provide an overview of the methods used.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>We searched 15 electronic databases and carried out reference checking to identify relevant studies that met our inclusion criteria: studies that predicted the age or gender of Twitter users using computational methods. The screening process was performed independently by 2 researchers to ensure the accuracy and reliability of the included studies.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>Of the initial 684 studies retrieved, 74 (10.8%) studies met our inclusion criteria. Among these 74 studies, 42 (57%) focused on predicting gender, 8 (11%) focused on predicting age, and 24 (32%) predicted a combination of both age and gender. Gender prediction was predominantly approached as a binary classification task, with the reported performance of the methods ranging from 0.58 to 0.96 <italic>F</italic><sub>1</sub>-score or 0.51 to 0.97 accuracy. Age prediction approaches varied in terms of classification groups, with a higher range of reported performance, ranging from 0.31 to 0.94 <italic>F</italic><sub>1</sub>-score or 0.43 to 0.86 accuracy. The heterogeneous nature of the studies and the reporting of dissimilar performance metrics made it challenging to quantitatively synthesize results and draw definitive conclusions.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>Our review found that although automated methods for predicting the age and gender of Twitter users have evolved to incorporate techniques such as deep neural networks, a significant proportion of the attempts rely on traditional machine learning methods, suggesting that there is potential to improve the performance of these tasks by using more advanced methods. Gender prediction has generally achieved a higher reported performance than age prediction. However, the lack of standardized reporting of performance metrics or standard annotated corpora to evaluate the methods used hinders any meaningful comparison of the approaches. Potential biases stemming from the collection and labeling of data used in the studies was identified as a problem, emphasizing the need for careful consideration and mitigation of biases in future studies. This scoping review provides valuable insights into the methods used for predicting the age and gender of Twitter users, along with the challenges and considerations associated with these methods.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>social media</kwd>
        <kwd>demographics</kwd>
        <kwd>Twitter</kwd>
        <kwd>age</kwd>
        <kwd>gender</kwd>
        <kwd>prediction</kwd>
        <kwd>real-world data</kwd>
        <kwd>neural network</kwd>
        <kwd>machine learning</kwd>
        <kwd>gender prediction</kwd>
        <kwd>age prediction</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <sec>
        <title>Background</title>
        <p>Real-world data are data regarding patients’ health collected outside randomized controlled trials from a variety of nontraditional resources such as electronic health records, medical claims data, or data generated by patients themselves such as social media data that may be used to support study design to develop real-world evidence [<xref ref-type="bibr" rid="ref1">1</xref>]. Real-world data from social media have been increasingly recognized as a valuable resource for gaining knowledge about and insight into a variety of health-related research topics, including disease surveillance [<xref ref-type="bibr" rid="ref2">2</xref>,<xref ref-type="bibr" rid="ref3">3</xref>], pharmacovigilance [<xref ref-type="bibr" rid="ref4">4</xref>,<xref ref-type="bibr" rid="ref5">5</xref>], and mental health [<xref ref-type="bibr" rid="ref6">6</xref>,<xref ref-type="bibr" rid="ref7">7</xref>]. They can also be used for the identification of cohorts for potential recruitment into traditional studies [<xref ref-type="bibr" rid="ref8">8</xref>,<xref ref-type="bibr" rid="ref9">9</xref>]. In short, social media can readily provide abundant personal health information in real time.</p>
        <p>The use of data from social media platforms, particularly Twitter (Twitter, Inc), for health-related research is subject to some inherent limitations in that demographic information (with the exception of location, which is available when the user has enabled the location feature) is not explicitly available through the application programming interface (API) [<xref ref-type="bibr" rid="ref10">10</xref>]. Demographic traits, including age, gender, race or ethnicity, location, education, and income, hold significant value in health research. Few studies based on Twitter data incorporated an assessment of Twitter user demographics into their analysis [<xref ref-type="bibr" rid="ref11">11</xref>]. Understanding the demographic traits of Twitter users provides significant value when using the data in health research. It not only facilitates sample representativeness, which is crucial for generalizing research findings and ensuring that the conclusions drawn from Twitter data can be extrapolated to broader populations [<xref ref-type="bibr" rid="ref12">12</xref>], but also enables subgroup analysis. It allows for the comparison of health-related behaviors, attitudes, and outcomes across different groups and enables targeted interventions and tailored health care strategies [<xref ref-type="bibr" rid="ref13">13</xref>,<xref ref-type="bibr" rid="ref14">14</xref>]. Moreover, demographic information is actionable and can assist in designing public health interventions and policies for specific populations based on their needs and concerns as expressed on social media.</p>
        <p>Predicting demographic traits is complex and challenging. A user’s profile does not necessarily include such information, and researchers have used other features available in the data, such as names, content of the tweets, or the individual’s network to make predictions. A 2018 systematic review assessed the use of social media to predict demographic traits, finding successful implementation for 14 traits, including gender and age [<xref ref-type="bibr" rid="ref15">15</xref>,<xref ref-type="bibr" rid="ref16">16</xref>]. Although the review provided a broad overview of the state of demographic prediction using social media, the details of the machine learning (ML) methods used were not reviewed. A recent review provided insights into the methods used for predicting the race and ethnicity of Twitter users [<xref ref-type="bibr" rid="ref17">17</xref>].</p>
      </sec>
      <sec>
        <title>Objectives</title>
        <p>In this study, our objective was to present a scoping review of automated methods used for predicting the age and gender of Twitter users to provide an overview of the techniques published since 2017. We focused our review on studies that used Twitter, as it is the most commonly used social media platform for this research [<xref ref-type="bibr" rid="ref15">15</xref>]. Twitter is an attractive platform to use in research, as the terms of use for this platform are well understood by both users and researchers, it includes an API, and the data on it are abundant for health-related research [<xref ref-type="bibr" rid="ref18">18</xref>].</p>
        <p>Although other demographic traits such as location, education, and income can provide valuable insights, the age and gender of Twitter users present distinct advantages and considerations for health research. Given the differences in disease presentation by gender, such as with acute coronary syndrome [<xref ref-type="bibr" rid="ref19">19</xref>], and by age, such as with COVID-19 [<xref ref-type="bibr" rid="ref20">20</xref>], identifying the age and gender of the users included in studies using Twitter data may elicit insights into disease prevalence, patterns, and variations across different subgroups in disease presentation or treatment response [<xref ref-type="bibr" rid="ref21">21</xref>,<xref ref-type="bibr" rid="ref22">22</xref>]. Age and gender also play crucial roles in shaping health behaviors and attitudes. For example, studying age and gender differences in smoking habits [<xref ref-type="bibr" rid="ref23">23</xref>], physical activity levels [<xref ref-type="bibr" rid="ref24">24</xref>], and adherence to medical treatments [<xref ref-type="bibr" rid="ref25">25</xref>,<xref ref-type="bibr" rid="ref26">26</xref>] can provide insights into effective interventions and health promotion campaigns for specific groups. Although Twitter users are generally representative of the population, there is a certain degree of skew in their demographics: there is an overrepresentation of individuals aged &#60;30 years, whereas individuals aged &#62;65 years are underrepresented when compared with the overall demographics of the US population [<xref ref-type="bibr" rid="ref27">27</xref>,<xref ref-type="bibr" rid="ref28">28</xref>]. Therefore, it is important to include the age and gender of Twitter users in a study to enable the accurate reporting of findings, making them specific to certain subgroups, or to make any necessary adjustments to account for potential biases that may arise from these demographic differences.</p>
        <p>Although studies aimed at predicting Twitter users’ gender began as early as 2011 [<xref ref-type="bibr" rid="ref29">29</xref>-<xref ref-type="bibr" rid="ref33">33</xref>] and efforts aimed at detecting the age of Twitter users have been made since 2013 [<xref ref-type="bibr" rid="ref34">34</xref>-<xref ref-type="bibr" rid="ref36">36</xref>], it is only since 2017 that the language processing community shifted its methods away from handcrafted rules and represented text documents with dense vectors to train deep neural networks (DNNs) [<xref ref-type="bibr" rid="ref37">37</xref>,<xref ref-type="bibr" rid="ref38">38</xref>], resulting in a noticeable increase in performance for many applications. We sought to examine whether these increases in performance were evident in the methods used for the prediction of the age and gender of Twitter users.</p>
      </sec>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Overview</title>
        <p>We report this review following the PRISMA-ScR (Preferred Reporting Items for Systematic Reviews and Meta-Analyses Extension for Scoping Reviews) [<xref ref-type="bibr" rid="ref39">39</xref>] methodology. The completed PRISMA-ScR checklist is available in Table S1 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>. We searched several databases to identify studies on the prediction of Twitter users’ age or gender or both. Our database search strategy combines 3 facets: facet 1 includes terms related to Twitter, facet 2 consists of terms for age or gender, and facet 3 consists of terms for methods of prediction such as ML. The search strategy was translated as appropriate for each database. The detailed search strategy is available in <xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>. The ML term facet was expanded using terms from related reviews by Hinds and Joinson [<xref ref-type="bibr" rid="ref15">15</xref>] and Umar et al [<xref ref-type="bibr" rid="ref40">40</xref>]. The search criteria were limited to peer-reviewed journals, conference proceedings, books, and theses.</p>
        <p>The following databases were searched with a publication date range of 2017 or later (<xref ref-type="boxed-text" rid="box1">Textbox 1</xref>).</p>
        <boxed-text id="box1" position="float">
          <title>List of databases searched with the total number of combined facet results.</title>
          <list list-type="bullet">
            <list-item>
              <p>ACL (Association for Computational Linguistics) Anthology: 5080, of which the first 50 records were screened</p>
            </list-item>
            <list-item>
              <p>ACM (Association for Computing Machinery) Digital Library: 23</p>
            </list-item>
            <list-item>
              <p>Cumulative Index to Nursing &#38; Allied Health (CINAHL): 57</p>
            </list-item>
            <list-item>
              <p>Embase: 262</p>
            </list-item>
            <list-item>
              <p>Google Scholar: 767,000, of which the first 50 records were screened</p>
            </list-item>
            <list-item>
              <p>IEEE (Institute of Electrical and Electronics Engineers) Xplore: 23</p>
            </list-item>
            <list-item>
              <p>Library and Information Science Abstracts: 31</p>
            </list-item>
            <list-item>
              <p>Library, Information Science and Technology Abstracts: 48</p>
            </list-item>
            <list-item>
              <p>Proquest Dissertations and Theses—United Kingdom and Ireland: 58</p>
            </list-item>
            <list-item>
              <p>Ovid MEDLINE: 183</p>
            </list-item>
            <list-item>
              <p>PsycINFO: 104</p>
            </list-item>
            <list-item>
              <p>Science Citation Index, Social Science Citation Index, Conference Proceedings Citation Index—Science, and Conference Proceedings Citation Index—Social Science and Humanities: 131</p>
            </list-item>
            <list-item>
              <p>Zetoc: 61</p>
            </list-item>
          </list>
        </boxed-text>
        <p>Citations were exported to a shared EndNote (Clarivate) library for deduplication. Using the Population, Intervention, Comparison, Outcomes, and Study Design (PICOS) [<xref ref-type="bibr" rid="ref41">41</xref>] framework, we developed a list of inclusion and exclusion criteria (refer to the <italic>Inclusion and Exclusion Criteria</italic> section), and 2 screeners from the research team screened the results independently, with disputes discussed after screening and a consensus decision reached. In addition, given that search engines and unmanageable data sources are recommended to be included as secondary data sources [<xref ref-type="bibr" rid="ref42">42</xref>-<xref ref-type="bibr" rid="ref44">44</xref>], the first 50 records from both ACL (Association for Computational Linguistics) Anthology and Google Scholar were screened using the aforementioned methods. We set a limit on the number of results screened, as the relevance of the results is ranked by the search engines, with the most relevant results listed first [<xref ref-type="bibr" rid="ref45">45</xref>-<xref ref-type="bibr" rid="ref48">48</xref>].</p>
      </sec>
      <sec>
        <title>Inclusion and Exclusion Criteria</title>
        <p>We framed our research question using the PICOS framework. <xref ref-type="table" rid="table1">Table 1</xref> outlines our specific inclusion and exclusion criteria. As explained in the <italic>Introduction</italic> section, we restricted the date of our search to include only publications from 2017 and beyond. No language restrictions were applied to the inclusion criteria; however, financial and logistical restraints allowed us to include only studies written in English, Spanish, Chinese, or French.</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Inclusion and exclusion criteria, developed per the Population, Intervention, Comparison, Outcomes, and Study Design framework, for the scoping review.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="100"/>
            <col width="530"/>
            <col width="370"/>
            <thead>
              <tr valign="top">
                <td>Facet</td>
                <td>Inclusion criteria</td>
                <td>Exclusion criteria</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Population</td>
                <td>Any Twitter (Twitter, Inc) data on Twitter users, such as posts, profile details, photos, or avatars</td>
                <td>Studies evaluating prediction from data on other social media platforms, such as Facebook (Meta Platforms, Inc) or Instagram (Meta Platforms, Inc)</td>
              </tr>
              <tr valign="top">
                <td>Intervention</td>
                <td>Methods for predicting the gender or age of Twitter users; articles that used machine learning, natural language processing, human in the loop, or other computationally assisted methods to predict the gender or age of the users</td>
                <td>Studies that contained no computation methods</td>
              </tr>
              <tr valign="top">
                <td>Comparator</td>
                <td>Any or none; we included any studies irrespective of whether they had a comparator and, if they did have a comparator, irrespective of what that was</td>
                <td>N/A<sup>a</sup></td>
              </tr>
              <tr valign="top">
                <td>Outcome</td>
                <td>Gender or age prediction</td>
                <td>Any other demographic trait prediction</td>
              </tr>
              <tr valign="top">
                <td>Study design</td>
                <td>Any type of peer-reviewed study reporting on the methods used to predict gender or age; such information must be the primary focus of the study or reported in enough detail to be reproducible</td>
                <td>Discussion papers, commentaries, and letters</td>
              </tr>
              <tr valign="top">
                <td>Date</td>
                <td>2017 or later</td>
                <td>Before 2017</td>
              </tr>
              <tr valign="top">
                <td>Language</td>
                <td>All</td>
                <td>None</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table1fn1">
              <p><sup>a</sup>N/A: not applicable.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Data Extraction</title>
        <p>From each included paper, we extracted the following data: the year of publication, publication type (journal, conference paper, book chapter, or thesis), demographic predicted (gender, age, or both), language of tweets, size of the data set, collection method for the data set, details of prediction models, features used in the models (posts, profile, and images), performance of the models, name of any software used for prediction, measures used to assess the methods and results of any evaluation, and the availability of data or code. The included papers were distributed among the authors for data extraction. The extracted data were validated by another author (KO).</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Overview</title>
        <p>Our database searches resulted in 981 studies, which were retrieved and entered into an EndNote library, where duplicates were removed, leaving 684 (69.7%) studies for sifting.</p>
        <p>After the abstract review, 172 (25.1%) of 684 studies were deemed potentially relevant by either one of the independent sifters (SG and KO). The full texts of these studies were screened independently, and disagreements were discussed, resulting in the inclusion of 74 (43%) studies [<xref ref-type="bibr" rid="ref49">49</xref>-<xref ref-type="bibr" rid="ref122">122</xref>] and exclusion of 98 (57%) studies (<xref rid="figure1" ref-type="fig">Figure 1</xref>).</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>PRISMA (Preferred Reporting Items for Systematic Reviews and Meta-Analyses) flow diagram of the included studies.</p>
          </caption>
          <graphic xlink:href="jmir_v26i1e47923_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Characteristics of the Included Studies</title>
        <p>Among the 74 included studies (<xref ref-type="supplementary-material" rid="app3">Multimedia Appendices 3</xref> [<xref ref-type="bibr" rid="ref49">49</xref>-<xref ref-type="bibr" rid="ref52">52</xref>,<xref ref-type="bibr" rid="ref54">54</xref>-<xref ref-type="bibr" rid="ref63">63</xref>,<xref ref-type="bibr" rid="ref65">65</xref>,<xref ref-type="bibr" rid="ref67">67</xref>-<xref ref-type="bibr" rid="ref72">72</xref>,<xref ref-type="bibr" rid="ref74">74</xref>-<xref ref-type="bibr" rid="ref89">89</xref>,<xref ref-type="bibr" rid="ref91">91</xref>-<xref ref-type="bibr" rid="ref93">93</xref>,<xref ref-type="bibr" rid="ref96">96</xref>-<xref ref-type="bibr" rid="ref99">99</xref>,<xref ref-type="bibr" rid="ref101">101</xref>-<xref ref-type="bibr" rid="ref122">122</xref>] and <xref ref-type="supplementary-material" rid="app4">4</xref> [<xref ref-type="bibr" rid="ref51">51</xref>,<xref ref-type="bibr" rid="ref53">53</xref>,<xref ref-type="bibr" rid="ref55">55</xref>,<xref ref-type="bibr" rid="ref56">56</xref>,<xref ref-type="bibr" rid="ref59">59</xref>,<xref ref-type="bibr" rid="ref60">60</xref>,<xref ref-type="bibr" rid="ref63">63</xref>-<xref ref-type="bibr" rid="ref67">67</xref>,<xref ref-type="bibr" rid="ref70">70</xref>,<xref ref-type="bibr" rid="ref73">73</xref>,<xref ref-type="bibr" rid="ref74">74</xref>,<xref ref-type="bibr" rid="ref77">77</xref>,<xref ref-type="bibr" rid="ref80">80</xref>,<xref ref-type="bibr" rid="ref83">83</xref>,<xref ref-type="bibr" rid="ref84">84</xref>,<xref ref-type="bibr" rid="ref87">87</xref>,<xref ref-type="bibr" rid="ref90">90</xref>,<xref ref-type="bibr" rid="ref94">94</xref>, <xref ref-type="bibr" rid="ref95">95</xref>,<xref ref-type="bibr" rid="ref99">99</xref>-<xref ref-type="bibr" rid="ref101">101</xref>,<xref ref-type="bibr" rid="ref108">108</xref>,<xref ref-type="bibr" rid="ref110">110</xref>,<xref ref-type="bibr" rid="ref112">112</xref>,<xref ref-type="bibr" rid="ref116">116</xref>,<xref ref-type="bibr" rid="ref118">118</xref>-<xref ref-type="bibr" rid="ref120">120</xref>]), the majority (n=42, 57%) focused on predicting only the gender of the individual, 24 (32%) explored predicting both gender and age, and 8 (11%) focused solely on predicting age. Most of the studies were published in conference proceedings (44/74, 59%), followed by journal articles (28/74, 38%), theses (2/74, 3%), and a book chapter (1/74, 1%).</p>
        <p>In 42 (57%) of the 74 studies, developing methods to predict Twitter users’ age or gender or both was the primary purpose. In the remaining studies (32/74, 43%), the identification of the demographic characteristics of Twitter users was secondary. Within this last group, 9 (28%) studies developed ad-hoc methods to determine age, gender, or both, whereas the others used open-source models (13/32, 41%) or off-the-shelf software (10/32, 31%).</p>
      </sec>
      <sec>
        <title>Studies Developing Ad-Hoc Methods for Gender and Age Prediction</title>
        <sec>
          <title>Gender</title>
          <sec>
            <title>Overview</title>
            <p>Of the 74 studies, 44 (59%) developed ad-hoc methods to predict the Twitter users’ gender. Of these 44 studies, 32 (73%) predicted the users’ gender alone [<xref ref-type="bibr" rid="ref49">49</xref>,<xref ref-type="bibr" rid="ref50">50</xref>,<xref ref-type="bibr" rid="ref52">52</xref>,<xref ref-type="bibr" rid="ref54">54</xref>,<xref ref-type="bibr" rid="ref57">57</xref>,<xref ref-type="bibr" rid="ref58">58</xref>,<xref ref-type="bibr" rid="ref68">68</xref>,<xref ref-type="bibr" rid="ref69">69</xref>, <xref ref-type="bibr" rid="ref71">71</xref>,<xref ref-type="bibr" rid="ref72">72</xref>,<xref ref-type="bibr" rid="ref75">75</xref>,<xref ref-type="bibr" rid="ref76">76</xref>,<xref ref-type="bibr" rid="ref79">79</xref>,<xref ref-type="bibr" rid="ref81">81</xref>,<xref ref-type="bibr" rid="ref82">82</xref>,<xref ref-type="bibr" rid="ref85">85</xref>,<xref ref-type="bibr" rid="ref86">86</xref>,<xref ref-type="bibr" rid="ref89">89</xref>,<xref ref-type="bibr" rid="ref92">92</xref>,<xref ref-type="bibr" rid="ref93">93</xref>,<xref ref-type="bibr" rid="ref96">96</xref>,<xref ref-type="bibr" rid="ref102">102</xref>,<xref ref-type="bibr" rid="ref104">104</xref>-<xref ref-type="bibr" rid="ref107">107</xref>,<xref ref-type="bibr" rid="ref111">111</xref>,<xref ref-type="bibr" rid="ref113">113</xref>, <xref ref-type="bibr" rid="ref115">115</xref>,<xref ref-type="bibr" rid="ref117">117</xref>,<xref ref-type="bibr" rid="ref121">121</xref>,<xref ref-type="bibr" rid="ref122">122</xref>], and 12 (27%) predicted gender along with age [<xref ref-type="bibr" rid="ref51">51</xref>,<xref ref-type="bibr" rid="ref55">55</xref>,<xref ref-type="bibr" rid="ref65">65</xref>,<xref ref-type="bibr" rid="ref70">70</xref>,<xref ref-type="bibr" rid="ref80">80</xref>,<xref ref-type="bibr" rid="ref83">83</xref>,<xref ref-type="bibr" rid="ref87">87</xref>,<xref ref-type="bibr" rid="ref101">101</xref>,<xref ref-type="bibr" rid="ref108">108</xref>,<xref ref-type="bibr" rid="ref110">110</xref>,<xref ref-type="bibr" rid="ref112">112</xref>,<xref ref-type="bibr" rid="ref116">116</xref>].</p>
            <p>Most studies that developed ad-hoc methods (41/44, 93%) approached the problem of gender prediction as a binary classification task, predicting whether the label male or female applies to each user account, whereas 4% (3/44) of studies [<xref ref-type="bibr" rid="ref93">93</xref>,<xref ref-type="bibr" rid="ref112">112</xref>,<xref ref-type="bibr" rid="ref119">119</xref>] added the classification of organization or brand.</p>
            <p>We found that approaches to predict gender included tweets written in multiple languages, including English [<xref ref-type="bibr" rid="ref52">52</xref>,<xref ref-type="bibr" rid="ref82">82</xref>,<xref ref-type="bibr" rid="ref83">83</xref>,<xref ref-type="bibr" rid="ref92">92</xref>,<xref ref-type="bibr" rid="ref93">93</xref>,<xref ref-type="bibr" rid="ref115">115</xref>,<xref ref-type="bibr" rid="ref117">117</xref>], German [<xref ref-type="bibr" rid="ref76">76</xref>], Slovenian [<xref ref-type="bibr" rid="ref106">106</xref>], Italian [<xref ref-type="bibr" rid="ref49">49</xref>], Japanese [<xref ref-type="bibr" rid="ref89">89</xref>], Arabic and Egyptian [<xref ref-type="bibr" rid="ref57">57</xref>,<xref ref-type="bibr" rid="ref58">58</xref>,<xref ref-type="bibr" rid="ref79">79</xref>], French, Dutch, Portuguese, and Spanish, and a multilingual study assessed tweets written in 28 languages and dialects [<xref ref-type="bibr" rid="ref112">112</xref>].</p>
          </sec>
          <sec>
            <title>Data Sets</title>
            <p>For the training and validation of the ad-hoc approaches for gender detection, some studies (19/44, 43%) used previously created annotated corpora, whereas others (27/44, 61%) collected data directly from Twitter. Among the 19 studies that used previously annotated data sets, 9 (47%) [<xref ref-type="bibr" rid="ref55">55</xref>,<xref ref-type="bibr" rid="ref57">57</xref>,<xref ref-type="bibr" rid="ref58">58</xref>,<xref ref-type="bibr" rid="ref68">68</xref>,<xref ref-type="bibr" rid="ref70">70</xref>,<xref ref-type="bibr" rid="ref86">86</xref>,<xref ref-type="bibr" rid="ref87">87</xref>,<xref ref-type="bibr" rid="ref96">96</xref>,<xref ref-type="bibr" rid="ref121">121</xref>] used corpora from the PAN-Conference and Labs of the Evaluation Forum (CLEF; PAN-CLEF) author profiling tasks [<xref ref-type="bibr" rid="ref123">123</xref>-<xref ref-type="bibr" rid="ref129">129</xref>], whereas 10 (53%) studies [<xref ref-type="bibr" rid="ref72">72</xref>,<xref ref-type="bibr" rid="ref75">75</xref>,<xref ref-type="bibr" rid="ref83">83</xref>,<xref ref-type="bibr" rid="ref85">85</xref>,<xref ref-type="bibr" rid="ref93">93</xref>,<xref ref-type="bibr" rid="ref104">104</xref>,<xref ref-type="bibr" rid="ref105">105</xref>,<xref ref-type="bibr" rid="ref115">115</xref>,<xref ref-type="bibr" rid="ref117">117</xref>,<xref ref-type="bibr" rid="ref122">122</xref>] relied on data sets from other studies [<xref ref-type="bibr" rid="ref113">113</xref>,<xref ref-type="bibr" rid="ref130">130</xref>-<xref ref-type="bibr" rid="ref136">136</xref>].</p>
            <p>In the 27 (61%) studies that collected data directly from Twitter, different components of Twitter accounts were used. These components were used either for manually or semiautomatically validating the gender of a user or for computing features describing the user to train a classifier (<xref ref-type="supplementary-material" rid="app5">Multimedia Appendix 5</xref> [<xref ref-type="bibr" rid="ref49">49</xref>-<xref ref-type="bibr" rid="ref122">122</xref>]). Despite data limitations from the Twitter API, it was the main source of data collection, with 22 (24%) studies [<xref ref-type="bibr" rid="ref49">49</xref>-<xref ref-type="bibr" rid="ref52">52</xref>,<xref ref-type="bibr" rid="ref54">54</xref>,<xref ref-type="bibr" rid="ref69">69</xref>,<xref ref-type="bibr" rid="ref71">71</xref>,<xref ref-type="bibr" rid="ref76">76</xref>,<xref ref-type="bibr" rid="ref79">79</xref>,<xref ref-type="bibr" rid="ref81">81</xref>,<xref ref-type="bibr" rid="ref89">89</xref>,<xref ref-type="bibr" rid="ref92">92</xref>,<xref ref-type="bibr" rid="ref101">101</xref>,<xref ref-type="bibr" rid="ref102">102</xref>,<xref ref-type="bibr" rid="ref106">106</xref>-<xref ref-type="bibr" rid="ref108">108</xref>,<xref ref-type="bibr" rid="ref110">110</xref>,<xref ref-type="bibr" rid="ref111">111</xref>,<xref ref-type="bibr" rid="ref116">116</xref>,<xref ref-type="bibr" rid="ref117">117</xref>,<xref ref-type="bibr" rid="ref121">121</xref>] collecting data either as a random sample from the Twitter Streaming API or based on keywords or geographic location from the Twitter Search API. Of the 5 studies not using the Twitter API, 1 (20%) [<xref ref-type="bibr" rid="ref82">82</xref>] collected data using a scraping tool, 3 (60%) [<xref ref-type="bibr" rid="ref80">80</xref>,<xref ref-type="bibr" rid="ref112">112</xref>,<xref ref-type="bibr" rid="ref113">113</xref>] used a random sample from a collection of 10% of tweets from 2014 to 2017 or the Twitter archive, and 1 (20%) did not specify its data source [<xref ref-type="bibr" rid="ref65">65</xref>].</p>
            <p>The 24 studies that created a labeled data set (<xref ref-type="supplementary-material" rid="app6">Multimedia Appendix 6</xref> [<xref ref-type="bibr" rid="ref49">49</xref>,<xref ref-type="bibr" rid="ref51">51</xref>-<xref ref-type="bibr" rid="ref54">54</xref>,<xref ref-type="bibr" rid="ref63">63</xref>,<xref ref-type="bibr" rid="ref64">64</xref>,<xref ref-type="bibr" rid="ref66">66</xref>,<xref ref-type="bibr" rid="ref69">69</xref>,<xref ref-type="bibr" rid="ref71">71</xref>,<xref ref-type="bibr" rid="ref73">73</xref>,<xref ref-type="bibr" rid="ref76">76</xref>,<xref ref-type="bibr" rid="ref77">77</xref>,<xref ref-type="bibr" rid="ref80">80</xref>,<xref ref-type="bibr" rid="ref82">82</xref>,<xref ref-type="bibr" rid="ref89">89</xref>,<xref ref-type="bibr" rid="ref90">90</xref>,<xref ref-type="bibr" rid="ref92">92</xref>, <xref ref-type="bibr" rid="ref106">106</xref>-<xref ref-type="bibr" rid="ref108">108</xref>,<xref ref-type="bibr" rid="ref110">110</xref>,<xref ref-type="bibr" rid="ref112">112</xref>,<xref ref-type="bibr" rid="ref113">113</xref>,<xref ref-type="bibr" rid="ref116">116</xref>-<xref ref-type="bibr" rid="ref118">118</xref>,<xref ref-type="bibr" rid="ref120">120</xref>]) to train and test or to validate the performance of the system determined the gender of the users using multiple components of their Twitter accounts (<xref ref-type="supplementary-material" rid="app5">Multimedia Appendix 5</xref>). A total of 11 (46%) studies labeled the data through manual annotation, where the annotators determined the gender using profile pictures [<xref ref-type="bibr" rid="ref52">52</xref>,<xref ref-type="bibr" rid="ref54">54</xref>], user names [<xref ref-type="bibr" rid="ref71">71</xref>], profiles [<xref ref-type="bibr" rid="ref89">89</xref>], or a combination of these [<xref ref-type="bibr" rid="ref76">76</xref>,<xref ref-type="bibr" rid="ref82">82</xref>,<xref ref-type="bibr" rid="ref92">92</xref>,<xref ref-type="bibr" rid="ref106">106</xref>,<xref ref-type="bibr" rid="ref108">108</xref>,<xref ref-type="bibr" rid="ref110">110</xref>,<xref ref-type="bibr" rid="ref116">116</xref>]. There were 11 (46%) studies that automatically or semiautomatically labeled their data sets via the detection of self-reports or gender-identifying terms (eg, mother, son, and uncle) [<xref ref-type="bibr" rid="ref69">69</xref>,<xref ref-type="bibr" rid="ref80">80</xref>,<xref ref-type="bibr" rid="ref108">108</xref>,<xref ref-type="bibr" rid="ref110">110</xref>,<xref ref-type="bibr" rid="ref112">112</xref>,<xref ref-type="bibr" rid="ref117">117</xref>], the user’s name [<xref ref-type="bibr" rid="ref49">49</xref>,<xref ref-type="bibr" rid="ref107">107</xref>,<xref ref-type="bibr" rid="ref113">113</xref>], or declarations on other linked social media [<xref ref-type="bibr" rid="ref116">116</xref>,<xref ref-type="bibr" rid="ref117">117</xref>]. A total of 3 (13%) studies created their labeled data sets by using the accounts of famous social media influencers [<xref ref-type="bibr" rid="ref65">65</xref>] or using an unspecified collection of users whose gender is known [<xref ref-type="bibr" rid="ref51">51</xref>,<xref ref-type="bibr" rid="ref79">79</xref>]. Of the 24 studies, only 8 (33%) reported data availability. Of the 8 studies, 6 (75%) stated availability <italic>by request</italic>, and 2 (25%) had working links to the whole corpus (<xref ref-type="supplementary-material" rid="app6">Multimedia Appendix 6</xref>).</p>
          </sec>
          <sec>
            <title>Nonpersonal Accounts</title>
            <p>A Twitter account may not be authored by or represent a single person. There are organization or company accounts as well as <italic>bot</italic> accounts. A bot is an automatic or semiautomatic user account. Some bot accounts identify themselves as such and may be used to automatically amplify news or tweets related to a certain topic. Others may emulate human accounts and be used with a more malicious intent to sow discord, manipulate public opinion, or spread misinformation. There were 9 (12%) of the 74 included studies [<xref ref-type="bibr" rid="ref49">49</xref>,<xref ref-type="bibr" rid="ref76">76</xref>,<xref ref-type="bibr" rid="ref92">92</xref>,<xref ref-type="bibr" rid="ref93">93</xref>,<xref ref-type="bibr" rid="ref96">96</xref>,<xref ref-type="bibr" rid="ref103">103</xref>,<xref ref-type="bibr" rid="ref104">104</xref>,<xref ref-type="bibr" rid="ref106">106</xref>,<xref ref-type="bibr" rid="ref112">112</xref>] that removed nonpersonal (organization) accounts when they manually annotated their collections. Some studies (11/74, 15%) implemented heuristics to explicitly detect and remove nonpersonal accounts [<xref ref-type="bibr" rid="ref49">49</xref>,<xref ref-type="bibr" rid="ref50">50</xref>,<xref ref-type="bibr" rid="ref59">59</xref>,<xref ref-type="bibr" rid="ref71">71</xref>,<xref ref-type="bibr" rid="ref81">81</xref>,<xref ref-type="bibr" rid="ref107">107</xref>,<xref ref-type="bibr" rid="ref113">113</xref>,<xref ref-type="bibr" rid="ref122">122</xref>], bot accounts [<xref ref-type="bibr" rid="ref98">98</xref>], or both [<xref ref-type="bibr" rid="ref79">79</xref>,<xref ref-type="bibr" rid="ref137">137</xref>]. Others (39/74, 53%) used previously annotated data sets consisting of only personal accounts, labeled and removed nonpersonal accounts, or collected their data sets based on self-reports of age and gender or other identifiable personal information. The remaining (15/74, 20%) studies provided no details on how or whether these accounts were removed (<xref ref-type="supplementary-material" rid="app5">Multimedia Appendix 5</xref>).</p>
          </sec>
          <sec>
            <title>Features and Models</title>
            <p>The reviewed studies used data labeled with the user’s gender to build and evaluate classification models based on features describing the tweets (such as n-grams, word embeddings, hashtags, and URLs) [<xref ref-type="bibr" rid="ref57">57</xref>,<xref ref-type="bibr" rid="ref58">58</xref>,<xref ref-type="bibr" rid="ref65">65</xref>,<xref ref-type="bibr" rid="ref68">68</xref>-<xref ref-type="bibr" rid="ref71">71</xref>,<xref ref-type="bibr" rid="ref75">75</xref>,<xref ref-type="bibr" rid="ref79">79</xref>,<xref ref-type="bibr" rid="ref82">82</xref>,<xref ref-type="bibr" rid="ref86">86</xref>,<xref ref-type="bibr" rid="ref87">87</xref>,<xref ref-type="bibr" rid="ref92">92</xref>, <xref ref-type="bibr" rid="ref96">96</xref>,<xref ref-type="bibr" rid="ref104">104</xref>,<xref ref-type="bibr" rid="ref109">109</xref>,<xref ref-type="bibr" rid="ref113">113</xref>,<xref ref-type="bibr" rid="ref121">121</xref>], features derived from the users’ profile metadata (such as user names, bio, followers, and users followed) [<xref ref-type="bibr" rid="ref49">49</xref>,<xref ref-type="bibr" rid="ref51">51</xref>,<xref ref-type="bibr" rid="ref52">52</xref>,<xref ref-type="bibr" rid="ref72">72</xref>,<xref ref-type="bibr" rid="ref80">80</xref>,<xref ref-type="bibr" rid="ref85">85</xref>,<xref ref-type="bibr" rid="ref112">112</xref>,<xref ref-type="bibr" rid="ref115">115</xref>,<xref ref-type="bibr" rid="ref122">122</xref>], features derived from a combination of their profile metadata and tweets [<xref ref-type="bibr" rid="ref52">52</xref>,<xref ref-type="bibr" rid="ref54">54</xref>,<xref ref-type="bibr" rid="ref76">76</xref>,<xref ref-type="bibr" rid="ref83">83</xref>,<xref ref-type="bibr" rid="ref93">93</xref>,<xref ref-type="bibr" rid="ref107">107</xref>,<xref ref-type="bibr" rid="ref108">108</xref>,<xref ref-type="bibr" rid="ref110">110</xref>,<xref ref-type="bibr" rid="ref117">117</xref>] or images [<xref ref-type="bibr" rid="ref52">52</xref>,<xref ref-type="bibr" rid="ref80">80</xref>,<xref ref-type="bibr" rid="ref108">108</xref>,<xref ref-type="bibr" rid="ref112">112</xref>, <xref ref-type="bibr" rid="ref116">116</xref>]. Of the 74 studies, 1 (3%) study from Japan included the user’s geographic information under the assumption that, culturally, a person of a certain demographic is more likely to frequent specific places [<xref ref-type="bibr" rid="ref89">89</xref>].</p>
            <p>Among the systems that used handcrafted features (25/44, 57%), most (13/25, 52%) achieved their best results using a support vector machine (SVM) [<xref ref-type="bibr" rid="ref49">49</xref>,<xref ref-type="bibr" rid="ref54">54</xref>,<xref ref-type="bibr" rid="ref65">65</xref>,<xref ref-type="bibr" rid="ref72">72</xref>,<xref ref-type="bibr" rid="ref82">82</xref>,<xref ref-type="bibr" rid="ref85">85</xref>,<xref ref-type="bibr" rid="ref86">86</xref>,<xref ref-type="bibr" rid="ref104">104</xref>-<xref ref-type="bibr" rid="ref106">106</xref>, <xref ref-type="bibr" rid="ref113">113</xref>,<xref ref-type="bibr" rid="ref116">116</xref>,<xref ref-type="bibr" rid="ref138">138</xref>], whereas others (12/25, 48%) used logistic regression [<xref ref-type="bibr" rid="ref87">87</xref>,<xref ref-type="bibr" rid="ref107">107</xref>,<xref ref-type="bibr" rid="ref110">110</xref>], naive Bayes [<xref ref-type="bibr" rid="ref51">51</xref>,<xref ref-type="bibr" rid="ref92">92</xref>], random forests [<xref ref-type="bibr" rid="ref80">80</xref>], bag of trees [<xref ref-type="bibr" rid="ref70">70</xref>], extreme gradient boosting [<xref ref-type="bibr" rid="ref89">89</xref>], or ensemble approaches [<xref ref-type="bibr" rid="ref76">76</xref>,<xref ref-type="bibr" rid="ref79">79</xref>,<xref ref-type="bibr" rid="ref107">107</xref>,<xref ref-type="bibr" rid="ref122">122</xref>] (details are provided in <xref ref-type="table" rid="table2">Table 2</xref>). Other systems used deep learning methods (15/44, 34%) such as DNNs, convolutional neural networks, feed forward neural networks or recurrent neural networks [<xref ref-type="bibr" rid="ref55">55</xref>,<xref ref-type="bibr" rid="ref68">68</xref>,<xref ref-type="bibr" rid="ref71">71</xref>,<xref ref-type="bibr" rid="ref75">75</xref>,<xref ref-type="bibr" rid="ref93">93</xref>,<xref ref-type="bibr" rid="ref115">115</xref>,<xref ref-type="bibr" rid="ref121">121</xref>], bidirectional long-term short-term memory [<xref ref-type="bibr" rid="ref58">58</xref>], gated recurrent units [<xref ref-type="bibr" rid="ref57">57</xref>], graph recursive neural networks [<xref ref-type="bibr" rid="ref83">83</xref>], and multimodal deep learning networks [<xref ref-type="bibr" rid="ref108">108</xref>,<xref ref-type="bibr" rid="ref112">112</xref>].</p>
            <p>One of the studies created a meta-classifier ensemble classifying users based on the predictions of multiple individual classifiers [<xref ref-type="bibr" rid="ref117">117</xref>], including SVM, bidirectional encoder representations from transformers, and 2 existing models [<xref ref-type="bibr" rid="ref112">112</xref>,<xref ref-type="bibr" rid="ref139">139</xref>]. Another study created a DNN for learning with label proportion, a semisupervised approach [<xref ref-type="bibr" rid="ref52">52</xref>]. The results of the best-performing deep learning model as reported in each study are presented in <xref ref-type="table" rid="table3">Table 3</xref>. Studies that used lexical matching (4/44, 9%) of the user’s name to a curated name dictionary [<xref ref-type="bibr" rid="ref50">50</xref>,<xref ref-type="bibr" rid="ref81">81</xref>,<xref ref-type="bibr" rid="ref101">101</xref>,<xref ref-type="bibr" rid="ref102">102</xref>] to determine gender reported no validation or performance metrics.</p>
            <table-wrap position="float" id="table2">
              <label>Table 2</label>
              <caption>
                <p>Top reported system performance for studies predicting the gender of Twitter users using traditional machine learning (ML) methods. Result metrics are reflected in this table as reported in the original publications and are not necessarily comparable to each other.</p>
              </caption>
              <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
                <col width="240"/>
                <col width="290"/>
                <col width="310"/>
                <col width="80"/>
                <col width="80"/>
                <thead>
                  <tr valign="top">
                    <td>Study</td>
                    <td>Language</td>
                    <td>ML method</td>
                    <td colspan="2">Reported performance</td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>
                      <break/>
                    </td>
                    <td>
                      <break/>
                    </td>
                    <td><italic>F</italic><sub>1</sub>-score</td>
                    <td>Accuracy</td>
                  </tr>
                </thead>
                <tbody>
                  <tr valign="top">
                    <td>Cesare et al [<xref ref-type="bibr" rid="ref122">122</xref>], 2017</td>
                    <td>English</td>
                    <td>Ensemble: lexical match and SVM<sup>a</sup> and DT<sup>b</sup></td>
                    <td>0.84</td>
                    <td>0.83</td>
                  </tr>
                  <tr valign="top">
                    <td>Jurgens et al [<xref ref-type="bibr" rid="ref80">80</xref>], 2017</td>
                    <td>English</td>
                    <td>RF<sup>c</sup> ensemble</td>
                    <td>0.78</td>
                    <td>0.80</td>
                  </tr>
                  <tr valign="top">
                    <td>Ljubešić et al [<xref ref-type="bibr" rid="ref85">85</xref>], 2017</td>
                    <td>Portuguese, French, Dutch, Spanish, German, and Italian</td>
                    <td>SVM</td>
                    <td>N/A<sup>d</sup></td>
                    <td>0.61-0.69</td>
                  </tr>
                  <tr valign="top">
                    <td>Markov et al [<xref ref-type="bibr" rid="ref87">87</xref>], 2016</td>
                    <td>English, Spanish, Dutch, and Italian</td>
                    <td>LogR<sup>e</sup></td>
                    <td>N/A</td>
                    <td>0.57-0.77</td>
                  </tr>
                  <tr valign="top">
                    <td>Mukherjee and Bala [<xref ref-type="bibr" rid="ref92">92</xref>], 2016</td>
                    <td>English</td>
                    <td>NB<sup>f</sup></td>
                    <td>0.75</td>
                    <td>0.71</td>
                  </tr>
                  <tr valign="top">
                    <td>Verhoeven et al [<xref ref-type="bibr" rid="ref106">106</xref>], 2017</td>
                    <td>Slovenian</td>
                    <td>SVM</td>
                    <td>0.93</td>
                    <td>0.93</td>
                  </tr>
                  <tr valign="top">
                    <td>Volkova [<xref ref-type="bibr" rid="ref110">110</xref>], 2015</td>
                    <td>English and Spanish</td>
                    <td>LogR</td>
                    <td>N/A</td>
                    <td>0.82</td>
                  </tr>
                  <tr valign="top">
                    <td>Xiang et al [<xref ref-type="bibr" rid="ref116">116</xref>], 2017</td>
                    <td>English</td>
                    <td>SVM and PME<sup>g</sup></td>
                    <td>N/A</td>
                    <td>0.76</td>
                  </tr>
                  <tr valign="top">
                    <td>Cheng et al [<xref ref-type="bibr" rid="ref65">65</xref>], 2018</td>
                    <td>English, Filipino, and Taglish</td>
                    <td>SVC<sup>h</sup> with lasso</td>
                    <td>0.84</td>
                    <td>0.84</td>
                  </tr>
                  <tr valign="top">
                    <td>Emmery et al [<xref ref-type="bibr" rid="ref69">69</xref>], 2017</td>
                    <td>English</td>
                    <td>fastText</td>
                    <td>N/A</td>
                    <td>0.76</td>
                  </tr>
                  <tr valign="top">
                    <td>Giannakopoulos et al [<xref ref-type="bibr" rid="ref72">72</xref>], 2018</td>
                    <td>N/A</td>
                    <td>SVM PNN<sup>i</sup></td>
                    <td>N/A</td>
                    <td>0.87</td>
                  </tr>
                  <tr valign="top">
                    <td>Khandelwal et al [<xref ref-type="bibr" rid="ref82">82</xref>], 2018</td>
                    <td>Code-mixed Hindi-English</td>
                    <td>SVM</td>
                    <td>N/A</td>
                    <td>0.9</td>
                  </tr>
                  <tr valign="top">
                    <td>Miura et al [<xref ref-type="bibr" rid="ref89">89</xref>], 2018</td>
                    <td>Japanese</td>
                    <td>XGBoost<sup>j</sup></td>
                    <td>N/A</td>
                    <td>0.89</td>
                  </tr>
                  <tr valign="top">
                    <td>van der Goot et al [<xref ref-type="bibr" rid="ref104">104</xref>], 2018</td>
                    <td>English, Dutch, French, Portuguese, and Spanish</td>
                    <td>SVM</td>
                    <td>N/A</td>
                    <td>0.66-0.72</td>
                  </tr>
                  <tr valign="top">
                    <td>Alessandra et al [<xref ref-type="bibr" rid="ref49">49</xref>], 2019</td>
                    <td>Italian</td>
                    <td>Ensemble: lexical match and SVM</td>
                    <td>N/A</td>
                    <td>0.75</td>
                  </tr>
                  <tr valign="top">
                    <td>Hirt et al [<xref ref-type="bibr" rid="ref76">76</xref>], 2019</td>
                    <td>German</td>
                    <td>Ensemble: binary classifiers</td>
                    <td>0.81</td>
                    <td>N/A</td>
                  </tr>
                  <tr valign="top">
                    <td>Hussein et al [<xref ref-type="bibr" rid="ref79">79</xref>], 2019</td>
                    <td>Dialect Egyptian Arabic</td>
                    <td>Ensemble: RF and LinR<sup>k</sup></td>
                    <td>NA</td>
                    <td>0.77-0.88</td>
                  </tr>
                  <tr valign="top">
                    <td>Vicente et al [<xref ref-type="bibr" rid="ref107">107</xref>], 2018</td>
                    <td>English and Portuguese</td>
                    <td>Ensemble: Face++, LinR, and SVM</td>
                    <td>N/A</td>
                    <td>0.93-0.97</td>
                  </tr>
                  <tr valign="top">
                    <td>Arafat et al [<xref ref-type="bibr" rid="ref51">51</xref>], 2020</td>
                    <td>Indonesian</td>
                    <td>Multinomial NB</td>
                    <td>N/A</td>
                    <td>0.75</td>
                  </tr>
                  <tr valign="top">
                    <td>Baxevanakis et al [<xref ref-type="bibr" rid="ref54">54</xref>], 2020</td>
                    <td>Greek</td>
                    <td>SVM</td>
                    <td>N/A</td>
                    <td>0.7</td>
                  </tr>
                  <tr valign="top">
                    <td>Garcia-Guzman et al [<xref ref-type="bibr" rid="ref70">70</xref>], 2020</td>
                    <td>English</td>
                    <td>Bag of trees</td>
                    <td>0.64</td>
                    <td>0.64</td>
                  </tr>
                  <tr valign="top">
                    <td>López-Monroy et al [<xref ref-type="bibr" rid="ref86">86</xref>], 2020</td>
                    <td>English and Spanish</td>
                    <td>Bag of trees</td>
                    <td>0.64</td>
                    <td>0.64</td>
                  </tr>
                  <tr valign="top">
                    <td>Pizarro [<xref ref-type="bibr" rid="ref96">96</xref>], 2020</td>
                    <td>English and Spanish</td>
                    <td>SVM</td>
                    <td>0.82-0.84</td>
                    <td>N/A</td>
                  </tr>
                  <tr valign="top">
                    <td>Vashisth and Meehan [<xref ref-type="bibr" rid="ref105">105</xref>], 2020</td>
                    <td>English</td>
                    <td>LogR</td>
                    <td>N/A</td>
                    <td>0.57</td>
                  </tr>
                  <tr valign="top">
                    <td>Wong et al [<xref ref-type="bibr" rid="ref113">113</xref>], 2020</td>
                    <td>English</td>
                    <td>SVM</td>
                    <td>0.58-0.62</td>
                    <td>0.60</td>
                  </tr>
                </tbody>
              </table>
              <table-wrap-foot>
                <fn id="table2fn1">
                  <p><sup>a</sup>SVM: support vector machine.</p>
                </fn>
                <fn id="table2fn2">
                  <p><sup>b</sup>DT: decision tree.</p>
                </fn>
                <fn id="table2fn3">
                  <p><sup>c</sup>RF: random forest.</p>
                </fn>
                <fn id="table2fn4">
                  <p><sup>d</sup>N/A: not applicable.</p>
                </fn>
                <fn id="table2fn5">
                  <p><sup>e</sup>LogR: logistic regression.</p>
                </fn>
                <fn id="table2fn6">
                  <p><sup>f</sup>NB: naive Bayes.</p>
                </fn>
                <fn id="table2fn7">
                  <p><sup>g</sup>PME: projection matrix extraction.</p>
                </fn>
                <fn id="table2fn8">
                  <p><sup>h</sup>SVC: support vector classifier.</p>
                </fn>
                <fn id="table2fn9">
                  <p><sup>i</sup>PNN: probabilistic neural network.</p>
                </fn>
                <fn id="table2fn10">
                  <p><sup>j</sup>XGBoost: extreme gradient boosting.</p>
                </fn>
                <fn id="table2fn11">
                  <p><sup>k</sup>LinR: linear regression.</p>
                </fn>
              </table-wrap-foot>
            </table-wrap>
            <table-wrap position="float" id="table3">
              <label>Table 3</label>
              <caption>
                <p>Top reported system performance for studies predicting the gender of Twitter users using deep learning machine learning (ML) methods. Result metrics are reflected in this table as reported in the original publications and are not comparable to each other.</p>
              </caption>
              <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
                <col width="240"/>
                <col width="390"/>
                <col width="210"/>
                <col width="80"/>
                <col width="80"/>
                <thead>
                  <tr valign="top">
                    <td>Study</td>
                    <td>Language</td>
                    <td>ML method</td>
                    <td colspan="2">Reported performance</td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>
                      <break/>
                    </td>
                    <td>
                      <break/>
                    </td>
                    <td><italic>F</italic><sub>1</sub>-score</td>
                    <td>Accuracy</td>
                  </tr>
                </thead>
                <tbody>
                  <tr valign="top">
                    <td>Ardehaly and Culotta [<xref ref-type="bibr" rid="ref52">52</xref>], 2017</td>
                    <td>English</td>
                    <td>Deep LLP<sup>a</sup></td>
                    <td>0.96</td>
                    <td>N/A<sup>b</sup></td>
                  </tr>
                  <tr valign="top">
                    <td>Geng et al [<xref ref-type="bibr" rid="ref71">71</xref>], 2017</td>
                    <td>English</td>
                    <td>Ensemble: LDA<sup>c</sup> and CNN<sup>d</sup></td>
                    <td>N/A</td>
                    <td>0.87</td>
                  </tr>
                  <tr valign="top">
                    <td>Kim et al [<xref ref-type="bibr" rid="ref83">83</xref>], 2017</td>
                    <td>English</td>
                    <td>GRNN<sup>e</sup></td>
                    <td>N/A</td>
                    <td>0.68</td>
                  </tr>
                  <tr valign="top">
                    <td>Vijayaraghavan et al [<xref ref-type="bibr" rid="ref108">108</xref>], 2017</td>
                    <td>English</td>
                    <td>DMT<sup>f</sup></td>
                    <td>0.89</td>
                    <td>N/A</td>
                  </tr>
                  <tr valign="top">
                    <td>Wang et al [<xref ref-type="bibr" rid="ref111">111</xref>], 2017</td>
                    <td>N/A</td>
                    <td>CNN</td>
                    <td>0.91</td>
                    <td>0.9</td>
                  </tr>
                  <tr valign="top">
                    <td>Bayot and Goncalves [<xref ref-type="bibr" rid="ref55">55</xref>], 2017</td>
                    <td>English and Spanish</td>
                    <td>CNN</td>
                    <td>N/A</td>
                    <td>0.59-0.72</td>
                  </tr>
                  <tr valign="top">
                    <td>Bsir and Zrigui [<xref ref-type="bibr" rid="ref57">57</xref>], 2018</td>
                    <td>Arabic</td>
                    <td>GRU<sup>g</sup></td>
                    <td>N/A</td>
                    <td>0.79</td>
                  </tr>
                  <tr valign="top">
                    <td>Wood-Doughty et al [<xref ref-type="bibr" rid="ref115">115</xref>], 2018</td>
                    <td>English</td>
                    <td>RNN<sup>h</sup></td>
                    <td>0.84</td>
                    <td>0.84</td>
                  </tr>
                  <tr valign="top">
                    <td>Bsir and Zrigui [<xref ref-type="bibr" rid="ref58">58</xref>], 2019</td>
                    <td>Arabic</td>
                    <td>BILSTM<sup>i</sup> with attention</td>
                    <td>N/A</td>
                    <td>0.82</td>
                  </tr>
                  <tr valign="top">
                    <td>Hashempour [<xref ref-type="bibr" rid="ref75">75</xref>], 2019</td>
                    <td>Portuguese, French, Dutch, Spanish, German, and Italian</td>
                    <td>FFNN<sup>j</sup></td>
                    <td>N/A</td>
                    <td>0.84-0.86</td>
                  </tr>
                  <tr valign="top">
                    <td>Wang et al [<xref ref-type="bibr" rid="ref112">112</xref>], 2019</td>
                    <td>Multilingual</td>
                    <td>mmDNN<sup>k</sup></td>
                    <td>0.92</td>
                    <td>N/A</td>
                  </tr>
                  <tr valign="top">
                    <td>ElSayed and Farouk [<xref ref-type="bibr" rid="ref68">68</xref>], 2020</td>
                    <td>Egyptian and Arabic dialects</td>
                    <td>Multichannel CNN-biGRU<sup>l</sup></td>
                    <td>N/A</td>
                    <td>0.84-0.91</td>
                  </tr>
                  <tr valign="top">
                    <td>Imuede et al [<xref ref-type="bibr" rid="ref93">93</xref>], 2020</td>
                    <td>English</td>
                    <td>DNN<sup>m</sup></td>
                    <td>N/A</td>
                    <td>0.68</td>
                  </tr>
                  <tr valign="top">
                    <td>Zhao et al [<xref ref-type="bibr" rid="ref121">121</xref>], 2020</td>
                    <td>English</td>
                    <td>CNN</td>
                    <td>0.80</td>
                    <td>N/A</td>
                  </tr>
                  <tr valign="top">
                    <td>Yang et al [<xref ref-type="bibr" rid="ref117">117</xref>], 2021</td>
                    <td>English</td>
                    <td>Ensemble: M3<sup>n</sup> and SVM<sup>o</sup></td>
                    <td>0.95</td>
                    <td>0.94</td>
                  </tr>
                </tbody>
              </table>
              <table-wrap-foot>
                <fn id="table3fn1">
                  <p><sup>a</sup>LLP: learning with label proportions.</p>
                </fn>
                <fn id="table3fn2">
                  <p><sup>b</sup>N/A: not applicable.</p>
                </fn>
                <fn id="table3fn3">
                  <p><sup>c</sup>LDA: latent Dirichlet allocation.</p>
                </fn>
                <fn id="table3fn4">
                  <p><sup>d</sup>CNN: convolutional neural network.</p>
                </fn>
                <fn id="table3fn5">
                  <p><sup>e</sup>GRNN: graph recurrent neural network.</p>
                </fn>
                <fn id="table3fn6">
                  <p><sup>f</sup>DMT: deep multimodal multitask.</p>
                </fn>
                <fn id="table3fn7">
                  <p><sup>g</sup>GRU: gated recurrent network.</p>
                </fn>
                <fn id="table3fn8">
                  <p><sup>h</sup>RNN: recurrent neural network.</p>
                </fn>
                <fn id="table3fn9">
                  <p><sup>i</sup>BILSTM: bidirectional long-term short-term memory.</p>
                </fn>
                <fn id="table3fn10">
                  <p><sup>j</sup>FFNN: feed forward neural network.</p>
                </fn>
                <fn id="table3fn11">
                  <p><sup>k</sup>mmDNN: multimodal deep neural network.</p>
                </fn>
                <fn id="table3fn12">
                  <p><sup>l</sup>biGRU: bidirectional gated recurrent unit.</p>
                </fn>
                <fn id="table3fn13">
                  <p><sup>m</sup>DNN: deep neural network.</p>
                </fn>
                <fn id="table3fn14">
                  <p><sup>n</sup>M3: multimodal, multilingual, and multi-attribute system.</p>
                </fn>
                <fn id="table3fn15">
                  <p><sup>o</sup>SVM: support vector machine.</p>
                </fn>
              </table-wrap-foot>
            </table-wrap>
          </sec>
          <sec>
            <title>Performance</title>
            <p>Performance results from the traditional ML methods cannot be directly compared against the deep learning methods used, as they were evaluated against different gold-standard corpora, and they used nonstandardized reporting metrics. However, looking at the overall results in terms of <italic>F</italic><sub>1</sub>-score, the results of the studies using deep learning had a relatively narrower range of reported performance (0.84-0.96), with a higher minimum of 0.84 and higher maximum of 0.96, compared with the reported performance range for traditional ML methods, which spans from 0.64 to 0.93.</p>
          </sec>
        </sec>
        <sec>
          <title>Age</title>
          <sec>
            <title>Overview</title>
            <p>We found 19 studies that developed ad-hoc methods to predict the Twitter user’s age, among which 7 (37%) predicted age exclusively [<xref ref-type="bibr" rid="ref53">53</xref>,<xref ref-type="bibr" rid="ref64">64</xref>,<xref ref-type="bibr" rid="ref66">66</xref>,<xref ref-type="bibr" rid="ref73">73</xref>,<xref ref-type="bibr" rid="ref90">90</xref>,<xref ref-type="bibr" rid="ref94">94</xref>,<xref ref-type="bibr" rid="ref95">95</xref>]. All but 1 (5%) of the studies [<xref ref-type="bibr" rid="ref80">80</xref>] approached the detection of Twitter users’ age as an automatic classification of predefined age groups. The number of age groups varied across the studies (<xref ref-type="table" rid="table3">Table 3</xref>), with the ages categorized into 2 [<xref ref-type="bibr" rid="ref53">53</xref>,<xref ref-type="bibr" rid="ref73">73</xref>,<xref ref-type="bibr" rid="ref83">83</xref>,<xref ref-type="bibr" rid="ref110">110</xref>,<xref ref-type="bibr" rid="ref116">116</xref>], 3 [<xref ref-type="bibr" rid="ref51">51</xref>,<xref ref-type="bibr" rid="ref66">66</xref>,<xref ref-type="bibr" rid="ref90">90</xref>, <xref ref-type="bibr" rid="ref94">94</xref>,<xref ref-type="bibr" rid="ref95">95</xref>,<xref ref-type="bibr" rid="ref101">101</xref>,<xref ref-type="bibr" rid="ref108">108</xref>], 4 [<xref ref-type="bibr" rid="ref70">70</xref>,<xref ref-type="bibr" rid="ref112">112</xref>], or more [<xref ref-type="bibr" rid="ref55">55</xref>,<xref ref-type="bibr" rid="ref64">64</xref>,<xref ref-type="bibr" rid="ref65">65</xref>,<xref ref-type="bibr" rid="ref87">87</xref>] groups. The range of ages within the groups also varied across the studies; for example, across the 5 studies that took a binary classification approach, Guimaraes et al [<xref ref-type="bibr" rid="ref73">73</xref>] used 13 to 19 years and ≥20 years as the 2 age groups, Volkova et al [<xref ref-type="bibr" rid="ref110">110</xref>] and Kim et al [<xref ref-type="bibr" rid="ref83">83</xref>] used 18 to 23 years or ≥25 years, Xiang et al [<xref ref-type="bibr" rid="ref116">116</xref>] used ≤30 years or &#62;30 years, and Ardehaly and Culotta [<xref ref-type="bibr" rid="ref53">53</xref>] used &#60;25 years and ≥25 years.</p>
            <p>Except for 2 (11%) studies that did not report the language of the tweets used [<xref ref-type="bibr" rid="ref51">51</xref>,<xref ref-type="bibr" rid="ref73">73</xref>], all studies used English language tweets. A total of 8 (42%) studies extended their systems to include additional languages, including Spanish [<xref ref-type="bibr" rid="ref55">55</xref>,<xref ref-type="bibr" rid="ref64">64</xref>,<xref ref-type="bibr" rid="ref87">87</xref>,<xref ref-type="bibr" rid="ref110">110</xref>], Dutch [<xref ref-type="bibr" rid="ref87">87</xref>,<xref ref-type="bibr" rid="ref94">94</xref>,<xref ref-type="bibr" rid="ref95">95</xref>], Filipino [<xref ref-type="bibr" rid="ref65">65</xref>], and multiple languages [<xref ref-type="bibr" rid="ref112">112</xref>].</p>
          </sec>
          <sec>
            <title>Data Sets</title>
            <p>Most studies (9/19, 47%) that developed new algorithms prepared new data sets to evaluate them with data retrieved directly using Twitter’s API [<xref ref-type="bibr" rid="ref51">51</xref>,<xref ref-type="bibr" rid="ref53">53</xref>,<xref ref-type="bibr" rid="ref66">66</xref>,<xref ref-type="bibr" rid="ref73">73</xref>,<xref ref-type="bibr" rid="ref90">90</xref>,<xref ref-type="bibr" rid="ref108">108</xref>] or used other sources of data for this purpose [<xref ref-type="bibr" rid="ref64">64</xref>,<xref ref-type="bibr" rid="ref80">80</xref>,<xref ref-type="bibr" rid="ref112">112</xref>] (<xref ref-type="supplementary-material" rid="app4">Multimedia Appendix 4</xref>). Several studies used data sets made available by other studies to train or evaluate their algorithms: among the 19 studies, 2 (11%) studies [<xref ref-type="bibr" rid="ref94">94</xref>,<xref ref-type="bibr" rid="ref95">95</xref>] combined data sets from Sloan et al [<xref ref-type="bibr" rid="ref34">34</xref>], Nguyen et al [<xref ref-type="bibr" rid="ref36">36</xref>], and Morgan-Lopez et al [<xref ref-type="bibr" rid="ref90">90</xref>]; Kim et al [<xref ref-type="bibr" rid="ref83">83</xref>] used the data set from Volkova et al [<xref ref-type="bibr" rid="ref140">140</xref>]; and 3 (15%) studies [<xref ref-type="bibr" rid="ref55">55</xref>,<xref ref-type="bibr" rid="ref70">70</xref>,<xref ref-type="bibr" rid="ref87">87</xref>] used data sets that were created for the PAN-CLEF author profiling shared tasks [<xref ref-type="bibr" rid="ref124">124</xref>-<xref ref-type="bibr" rid="ref126">126</xref>]. The studies that prepared new data sets (<xref ref-type="supplementary-material" rid="app6">Multimedia Appendix 6</xref>) labeled users’ age groups by automatically or semiautomatically searching (1) for tweets that self-reported birthday announcements or age [<xref ref-type="bibr" rid="ref53">53</xref>,<xref ref-type="bibr" rid="ref80">80</xref>,<xref ref-type="bibr" rid="ref90">90</xref>,<xref ref-type="bibr" rid="ref108">108</xref>,<xref ref-type="bibr" rid="ref110">110</xref>,<xref ref-type="bibr" rid="ref112">112</xref>], (2) for tweets in which a user was wished a happy birthday [<xref ref-type="bibr" rid="ref90">90</xref>], (3) for profiles that self-reported age [<xref ref-type="bibr" rid="ref64">64</xref>,<xref ref-type="bibr" rid="ref66">66</xref>,<xref ref-type="bibr" rid="ref108">108</xref>,<xref ref-type="bibr" rid="ref112">112</xref>], (4) for profiles that mentioned age-related keywords (eg, <italic>grandparent</italic>) [<xref ref-type="bibr" rid="ref66">66</xref>,<xref ref-type="bibr" rid="ref112">112</xref>], or (5) for manual annotation based on images or profile metadata [<xref ref-type="bibr" rid="ref112">112</xref>,<xref ref-type="bibr" rid="ref116">116</xref>,<xref ref-type="bibr" rid="ref140">140</xref>] or (6) by subjectively perceiving age groups based on the content of individual tweets [<xref ref-type="bibr" rid="ref73">73</xref>]. In 1 (5%) study [<xref ref-type="bibr" rid="ref51">51</xref>], a mixture of self-reported information and demographic information of known individuals was used to label the data. Similar to studies on gender, the reported availability of the corpora was scarce. Only 5 (26%) studies reported that their data sets were available, 2 (40%) by request, 1 (20%) provided a link to the whole data set, and 2 (40%) provided a link to a sample of the corpus (<xref ref-type="supplementary-material" rid="app6">Multimedia Appendix 6</xref>).</p>
          </sec>
          <sec>
            <title>Features and Models</title>
            <p>The studies used labeled age groups to evaluate classification models based on the features of the users’ profile metadata (eg, user names, bio, followers, and users followed) [<xref ref-type="bibr" rid="ref51">51</xref>,<xref ref-type="bibr" rid="ref53">53</xref>,<xref ref-type="bibr" rid="ref64">64</xref>,<xref ref-type="bibr" rid="ref80">80</xref>,<xref ref-type="bibr" rid="ref112">112</xref>], a combination of their profile metadata and tweets (eg, n-grams, word embeddings, hashtags, and URLs) [<xref ref-type="bibr" rid="ref73">73</xref>,<xref ref-type="bibr" rid="ref83">83</xref>,<xref ref-type="bibr" rid="ref90">90</xref>,<xref ref-type="bibr" rid="ref94">94</xref>,<xref ref-type="bibr" rid="ref95">95</xref>,<xref ref-type="bibr" rid="ref108">108</xref>,<xref ref-type="bibr" rid="ref110">110</xref>], tweet texts only [<xref ref-type="bibr" rid="ref65">65</xref>,<xref ref-type="bibr" rid="ref66">66</xref>,<xref ref-type="bibr" rid="ref70">70</xref>,<xref ref-type="bibr" rid="ref87">87</xref>], or images [<xref ref-type="bibr" rid="ref80">80</xref>,<xref ref-type="bibr" rid="ref108">108</xref>,<xref ref-type="bibr" rid="ref112">112</xref>,<xref ref-type="bibr" rid="ref116">116</xref>].</p>
            <p>For automatic classification, most studies (12/19, 63%) used traditional supervised ML methods, including logistic regression [<xref ref-type="bibr" rid="ref51">51</xref>,<xref ref-type="bibr" rid="ref66">66</xref>,<xref ref-type="bibr" rid="ref87">87</xref>,<xref ref-type="bibr" rid="ref90">90</xref>,<xref ref-type="bibr" rid="ref110">110</xref>], Bayesian probabilistic inference [<xref ref-type="bibr" rid="ref64">64</xref>], random forests [<xref ref-type="bibr" rid="ref80">80</xref>], bag of trees [<xref ref-type="bibr" rid="ref70">70</xref>], or SVM [<xref ref-type="bibr" rid="ref65">65</xref>,<xref ref-type="bibr" rid="ref116">116</xref>], or a semisupervised approach, learning from label proportion [<xref ref-type="bibr" rid="ref53">53</xref>]. Other studies (7/16, 37%) used deep learning methods such as convolutional neural networks [<xref ref-type="bibr" rid="ref55">55</xref>,<xref ref-type="bibr" rid="ref73">73</xref>,<xref ref-type="bibr" rid="ref94">94</xref>,<xref ref-type="bibr" rid="ref95">95</xref>], graph recursive neural networks [<xref ref-type="bibr" rid="ref83">83</xref>], and multimodal deep learning networks [<xref ref-type="bibr" rid="ref108">108</xref>,<xref ref-type="bibr" rid="ref112">112</xref>]. The best-performing systems for each study are listed in <xref ref-type="table" rid="table4">Tables 4</xref> and <xref ref-type="table" rid="table5">5</xref>. Of the 19 studies, 1 (5%) [<xref ref-type="bibr" rid="ref101">101</xref>] classified age based on a previously developed age lexicon and did not report any performance metrics.</p>
            <table-wrap position="float" id="table4">
              <label>Table 4</label>
              <caption>
                <p>Top reported system performance for studies predicting the age of Twitter users using traditional machine learning (ML) methods. Result metrics are reflected in this table as reported in the original publications and are not directly comparable to each other. Reviews are ordered by the number of classification groups.</p>
              </caption>
              <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
                <col width="230"/>
                <col width="100"/>
                <col width="210"/>
                <col width="180"/>
                <col width="120"/>
                <col width="80"/>
                <col width="80"/>
                <thead>
                  <tr valign="top">
                    <td>Study</td>
                    <td>Number of age groups</td>
                    <td>Age class detail (y)</td>
                    <td>Language</td>
                    <td>ML method</td>
                    <td colspan="2">Reported performance</td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>
                      <break/>
                    </td>
                    <td>
                      <break/>
                    </td>
                    <td>
                      <break/>
                    </td>
                    <td>
                      <break/>
                    </td>
                    <td><italic>F</italic><sub>1</sub>-score</td>
                    <td>Accuracy</td>
                  </tr>
                </thead>
                <tbody>
                  <tr valign="top">
                    <td>Jurgens et al [<xref ref-type="bibr" rid="ref80">80</xref>], 2017</td>
                    <td>N/A<sup>a</sup></td>
                    <td>Continuous</td>
                    <td>English</td>
                    <td>RF<sup>b</sup> regression</td>
                    <td>N/A</td>
                    <td>0.71</td>
                  </tr>
                  <tr valign="top">
                    <td>Volkova [<xref ref-type="bibr" rid="ref110">110</xref>], 2017</td>
                    <td>2</td>
                    <td>18-23 and 25-30</td>
                    <td>English and Spanish</td>
                    <td>LogR<sup>c</sup></td>
                    <td>N/A</td>
                    <td>0.77</td>
                  </tr>
                  <tr valign="top">
                    <td>Xiang et al [<xref ref-type="bibr" rid="ref116">116</xref>], 2017</td>
                    <td>2</td>
                    <td>≤30 and &#62;30</td>
                    <td>English</td>
                    <td>CPME<sup>d</sup></td>
                    <td>N/A</td>
                    <td>0.74</td>
                  </tr>
                  <tr valign="top">
                    <td>Ardehaly and Culotta [<xref ref-type="bibr" rid="ref53">53</xref>], 2018</td>
                    <td>2</td>
                    <td>&#60;25 and &#62;25</td>
                    <td>English</td>
                    <td>LLP<sup>e</sup></td>
                    <td>N/A</td>
                    <td>0.78</td>
                  </tr>
                  <tr valign="top">
                    <td>Morgan-Lopez et al [<xref ref-type="bibr" rid="ref90">90</xref>], 2017</td>
                    <td>3</td>
                    <td>13-17, 18-24, and &#62;24</td>
                    <td>English</td>
                    <td>LogR</td>
                    <td>0.74</td>
                    <td>N/A</td>
                  </tr>
                  <tr valign="top">
                    <td>Arafat et al [<xref ref-type="bibr" rid="ref51">51</xref>], 2020</td>
                    <td>3</td>
                    <td>≤24, 25-39, and ≥40</td>
                    <td>NR<sup>f</sup></td>
                    <td>LogR</td>
                    <td>N/A</td>
                    <td>0.71</td>
                  </tr>
                  <tr valign="top">
                    <td>Cornelisse and Pillai [<xref ref-type="bibr" rid="ref66">66</xref>], 2020</td>
                    <td>3</td>
                    <td>18-24, 25-54, and &#62;55</td>
                    <td>English</td>
                    <td>LogR</td>
                    <td>0.78</td>
                    <td>N/A</td>
                  </tr>
                  <tr valign="top">
                    <td>Markov et al [<xref ref-type="bibr" rid="ref87">87</xref>], 2017</td>
                    <td>5</td>
                    <td>18-24, 25-34, 35-49, 50-64, and &#62;65</td>
                    <td>English, Spanish, Dutch, and Italian</td>
                    <td>LogR</td>
                    <td>N/A</td>
                    <td>0.56-0.65</td>
                  </tr>
                  <tr valign="top">
                    <td>Cheng et al [<xref ref-type="bibr" rid="ref65">65</xref>], 2018</td>
                    <td>5</td>
                    <td>18-24, 25-34, 35-44, 45-54, and 55-64</td>
                    <td>English, Filipino, and Taglish</td>
                    <td>SVC<sup>g</sup></td>
                    <td>0.61</td>
                    <td>0.86</td>
                  </tr>
                  <tr valign="top">
                    <td>Garcia-Guzman et al [<xref ref-type="bibr" rid="ref70">70</xref>], 2020</td>
                    <td>4</td>
                    <td>18-24, 25-34, 35-49, and &#62;50</td>
                    <td>English</td>
                    <td>Bag of trees</td>
                    <td>N/A</td>
                    <td>0.67</td>
                  </tr>
                  <tr valign="top">
                    <td>Chamberlain et al [<xref ref-type="bibr" rid="ref64">64</xref>], 2017</td>
                    <td>10 (3 subgroups)</td>
                    <td>&#60;12, 12-13, 14-15, 16-17, 18-24, 25-34, 35-44, 45-54, 55-64, and &#62;64</td>
                    <td>English, Spanish, French, and Portuguese</td>
                    <td>Bayesian probability</td>
                    <td>0.31-0.86 (3 class)</td>
                    <td>N/A</td>
                  </tr>
                </tbody>
              </table>
              <table-wrap-foot>
                <fn id="table4fn1">
                  <p><sup>a</sup>N/A: not applicable.</p>
                </fn>
                <fn id="table4fn2">
                  <p><sup>b</sup>RF: random forest.</p>
                </fn>
                <fn id="table4fn3">
                  <p><sup>c</sup>LogR: logistic regression.</p>
                </fn>
                <fn id="table4fn4">
                  <p><sup>d</sup>CPME: coupled projection matrix extraction.</p>
                </fn>
                <fn id="table4fn5">
                  <p><sup>e</sup>LLP: learning with label proportions.</p>
                </fn>
                <fn id="table4fn6">
                  <p><sup>f</sup>NR: not reported.</p>
                </fn>
                <fn id="table4fn7">
                  <p><sup>g</sup>SVC: support vector classifier.</p>
                </fn>
              </table-wrap-foot>
            </table-wrap>
            <table-wrap position="float" id="table5">
              <label>Table 5</label>
              <caption>
                <p>Top reported system performance for studies predicting the age of Twitter users using deep learning machine learning (ML) methods. Result metrics are reflected in this table as reported in the original publications and are not comparable to each other. Reviews are ordered by the number of classification groups.</p>
              </caption>
              <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
                <col width="230"/>
                <col width="100"/>
                <col width="250"/>
                <col width="160"/>
                <col width="100"/>
                <col width="80"/>
                <col width="80"/>
                <thead>
                  <tr valign="top">
                    <td>Study</td>
                    <td>Number of age groups</td>
                    <td>Age class detail (y)</td>
                    <td>Language</td>
                    <td>ML method</td>
                    <td colspan="2">Reported performance</td>
                  </tr>
                  <tr valign="top">
                    <td>
                      <break/>
                    </td>
                    <td>
                      <break/>
                    </td>
                    <td>
                      <break/>
                    </td>
                    <td>
                      <break/>
                    </td>
                    <td>
                      <break/>
                    </td>
                    <td><italic>F</italic><sub>1</sub>-score</td>
                    <td>Accuracy</td>
                  </tr>
                </thead>
                <tbody>
                  <tr valign="top">
                    <td>Guimaraes et al [<xref ref-type="bibr" rid="ref73">73</xref>], 2017</td>
                    <td>2</td>
                    <td>13-19 and &#62;20</td>
                    <td>English</td>
                    <td>CNN<sup>a</sup></td>
                    <td>0.94</td>
                    <td>N/A<sup>b</sup></td>
                  </tr>
                  <tr valign="top">
                    <td>Kim et al [<xref ref-type="bibr" rid="ref83">83</xref>], 2017</td>
                    <td>2</td>
                    <td>Young (18-23) and old (25-30)</td>
                    <td>English</td>
                    <td>GRNN<sup>c</sup></td>
                    <td>N/A</td>
                    <td>0.81</td>
                  </tr>
                  <tr valign="top">
                    <td>Vijayaraghavan et al [<xref ref-type="bibr" rid="ref108">108</xref>], 2017</td>
                    <td>3</td>
                    <td>&#60;30, 30-60, and &#62;60</td>
                    <td>English</td>
                    <td>DMT<sup>d</sup></td>
                    <td>0.82</td>
                    <td>N/A</td>
                  </tr>
                  <tr valign="top">
                    <td>Pandya et al [<xref ref-type="bibr" rid="ref94">94</xref>], 2018</td>
                    <td>3</td>
                    <td>Dutch: &#60;20, 20-40, and &#62;40; English 1: 13-17, 18-40, and &#62;40; and English 2: 13-17, 18-24, and &#62;25</td>
                    <td>English and Dutch</td>
                    <td>CNN</td>
                    <td>0.61-0.87</td>
                    <td>N/A</td>
                  </tr>
                  <tr valign="top">
                    <td>Pandya et al [<xref ref-type="bibr" rid="ref95">95</xref>], 2020</td>
                    <td>3</td>
                    <td>Dutch: &#60;20, 20-40, and &#62;40; English 1: 13-17, 18-40, and &#62;40; and English 2: 13-17, 18-24, and &#62;25</td>
                    <td>English and Dutch</td>
                    <td>CNN</td>
                    <td>0.82-0.87</td>
                    <td>N/A</td>
                  </tr>
                  <tr valign="top">
                    <td>Wang et al [<xref ref-type="bibr" rid="ref112">112</xref>], 2019</td>
                    <td>4</td>
                    <td>≤18, 18-30, 30-40, and 40-99</td>
                    <td>Multilingual—28</td>
                    <td>mmDNN<sup>e</sup></td>
                    <td>0.52</td>
                    <td>N/A</td>
                  </tr>
                  <tr valign="top">
                    <td>Bayot and Goncalves [<xref ref-type="bibr" rid="ref55">55</xref>], 2017</td>
                    <td>5</td>
                    <td>18-24, 25-34, 35-49, 50-64, and ≥65</td>
                    <td>English and Spanish</td>
                    <td>CNN</td>
                    <td>N/A</td>
                    <td>0.43-0.55</td>
                  </tr>
                </tbody>
              </table>
              <table-wrap-foot>
                <fn id="table5fn1">
                  <p><sup>a</sup>CNN: convolutional neural network.</p>
                </fn>
                <fn id="table5fn2">
                  <p><sup>b</sup>N/A: not applicable.</p>
                </fn>
                <fn id="table5fn3">
                  <p><sup>c</sup>GRNN: graph recurrent neural network.</p>
                </fn>
                <fn id="table5fn4">
                  <p><sup>d</sup>DMT: deep multimodal multitask.</p>
                </fn>
                <fn id="table5fn5">
                  <p><sup>e</sup>mmDNN: multimodal deep neural network.</p>
                </fn>
              </table-wrap-foot>
            </table-wrap>
          </sec>
          <sec>
            <title>Performance</title>
            <p>Assessing the performance differences between studies using traditional ML methods and those using deep learning or neural networks is challenging owing to variations in classification criteria (eg, different age groupings and different number of classification categories) and the variety of performance metrics reported. However, for both methods, higher performance was noted when the problem was framed as a binary or ternary classification than as a larger multinomial classification.</p>
          </sec>
        </sec>
      </sec>
      <sec>
        <title>Studies Using Previously Developed Methods</title>
        <sec>
          <title>Overview</title>
          <p>Among the 74 included studies, there were 23 (31%) studies in which the detection of gender or age was secondary to their research, and previously developed methods were used to detect the demographic information of their cohort. Of the 23 studies, 13 (57%) used open-source models, and 10 (43%) used off-the-shelf software. More details about each study are given in the subsequent sections.</p>
        </sec>
        <sec>
          <title>Open-Source Models</title>
          <p>Of the 13 studies that used open-source models, 3 (4%) [<xref ref-type="bibr" rid="ref74">74</xref>,<xref ref-type="bibr" rid="ref99">99</xref>,<xref ref-type="bibr" rid="ref100">100</xref>] drew upon an extant model [<xref ref-type="bibr" rid="ref141">141</xref>] that used a predictive lexicon for the multiclass classification of age or gender for their applications. None of these studies created a validation corpus to assess the performance of the system, which was originally reported as 89.9% accuracy for gender and 0.84 Pearson correlation coefficient for age. One (1%) study [<xref ref-type="bibr" rid="ref118">118</xref>] used the same text-based model [<xref ref-type="bibr" rid="ref141">141</xref>] and an image model [<xref ref-type="bibr" rid="ref142">142</xref>] to determine the age and gender of their cohort. When tested against their gold-standard corpus of self-reports from profile descriptions, they found that the imaging model performed best for gender (accuracy=90%-92%), whereas textual features gave the best results for age (accuracy=60%). A total of 3 (4%) studies [<xref ref-type="bibr" rid="ref78">78</xref>,<xref ref-type="bibr" rid="ref91">91</xref>,<xref ref-type="bibr" rid="ref114">114</xref>] used demographer [<xref ref-type="bibr" rid="ref115">115</xref>,<xref ref-type="bibr" rid="ref139">139</xref>,<xref ref-type="bibr" rid="ref143">143</xref>] for gender predictions, with 1 (33%) study [<xref ref-type="bibr" rid="ref91">91</xref>] evaluating the performance against a set of users who had self-reported their gender in a survey, finding an <italic>F</italic><sub>1</sub>-score of 0.869 for women and 0.770 for men. A total of 2 (3%) studies [<xref ref-type="bibr" rid="ref61">61</xref>,<xref ref-type="bibr" rid="ref62">62</xref>] used an ensemble classifier of previously developed models, with a reported accuracy of 0.83 and an <italic>F</italic><sub>1</sub>-score of 0.83 [<xref ref-type="bibr" rid="ref122">122</xref>]. Two (3%) other studies [<xref ref-type="bibr" rid="ref67">67</xref>,<xref ref-type="bibr" rid="ref120">120</xref>] used M3 [<xref ref-type="bibr" rid="ref112">112</xref>] to detect gender and age, with 1 (50%) study validating the performance using a manually labeled data set, finding an accuracy of 95.9% and an <italic>F</italic><sub>1</sub>-score of 0.957 for gender and an accuracy of 77.6% and an <italic>F</italic><sub>1</sub>-score of 0.731 for age. One (1%) study [<xref ref-type="bibr" rid="ref56">56</xref>] used Deep EXpectation of apparent age [<xref ref-type="bibr" rid="ref144">144</xref>] for age and gender detection, which reported a validation error of 3.96 years for age and an 88% accuracy for gender. One (1%) study [<xref ref-type="bibr" rid="ref98">98</xref>] used the rOPenSci <italic>gender</italic> package, and no assessment of performance was reported.</p>
        </sec>
        <sec>
          <title>Off-the-Shelf Software</title>
          <p>In the 10 studies that used off-the-shelf software, Face ++ was the most common software, being used in 6 (60%) studies [<xref ref-type="bibr" rid="ref63">63</xref>,<xref ref-type="bibr" rid="ref77">77</xref>,<xref ref-type="bibr" rid="ref88">88</xref>,<xref ref-type="bibr" rid="ref97">97</xref>,<xref ref-type="bibr" rid="ref109">109</xref>,<xref ref-type="bibr" rid="ref119">119</xref>]. The remaining studies used DemographicsPro [<xref ref-type="bibr" rid="ref59">59</xref>,<xref ref-type="bibr" rid="ref60">60</xref>], Microsoft Face API [<xref ref-type="bibr" rid="ref84">84</xref>], and RapidMiner [<xref ref-type="bibr" rid="ref103">103</xref>].</p>
          <p>In 4 (40%) [<xref ref-type="bibr" rid="ref88">88</xref>,<xref ref-type="bibr" rid="ref97">97</xref>,<xref ref-type="bibr" rid="ref103">103</xref>,<xref ref-type="bibr" rid="ref109">109</xref>] of the 10 studies, no validation of performance was carried out, and a further 2 (20%) studies simply reported that DemographicsPro <italic>requires</italic> 95% confidence to make an estimation [<xref ref-type="bibr" rid="ref59">59</xref>,<xref ref-type="bibr" rid="ref60">60</xref>]. Other studies (n=4, 40%) compared with manual annotation and identified an accuracy of 82.8% for age using Face ++ [<xref ref-type="bibr" rid="ref77">77</xref>], 68% for strict age groups, or 83% if the age groupings were relaxed [<xref ref-type="bibr" rid="ref63">63</xref>]. The performance for age using Microsoft Face API was measured at 0.895 Gwet agreement coefficient (AC) [<xref ref-type="bibr" rid="ref84">84</xref>], when compared with manually labeled data sets.</p>
          <p>For gender, the studies (2/10, 20%) that measured performance against their own gold-standard labeled set of users recorded accuracies of 94.4% [<xref ref-type="bibr" rid="ref77">77</xref>] or 88% [<xref ref-type="bibr" rid="ref63">63</xref>] using Face ++. Other studies (3/10, 30%) [<xref ref-type="bibr" rid="ref88">88</xref>,<xref ref-type="bibr" rid="ref97">97</xref>,<xref ref-type="bibr" rid="ref109">109</xref>] reported a confidence level of 95% +0.015 or –0.015 using Face ++ for gender prediction.</p>
          <p>Only 1 (10%) [<xref ref-type="bibr" rid="ref119">119</xref>] of the 10 studies went beyond manual annotation to create a gold standard and used multiple search techniques to manually verify age and gender, including LinkedIn profiles, electoral roll listings, personal websites, Twitter descriptions, and Twitter profile images. In this study, Face++ accuracy for age was reported as 40.4%, and Face++ accuracy for gender was reported as 44.8% (with a valid image accuracy of 32.5% for age and 87.7% for gender), and crowdsourcing annotation accuracy for age was 60.8% and for gender was 86.4% (with valid image accuracy of 56.1% for age and 93.9% for gender).</p>
        </sec>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <sec>
          <title>Overview</title>
          <p>In this review, we aimed to provide an overview of recent ML methods used to predict the gender and age of Twitter users, as these are key demographics for epidemiology. Our review indicates that both tasks have been popular, but the identification of gender has received more attention than the identification of age. However, no de facto standards for research (ie, data collection and evaluation) have emerged, resulting in a large number of heterogeneous studies that are not directly comparable. Thus, it is not straightforward to conclude where the state-of-the-art stands for these tasks.</p>
          <p>Our review found evidence of potential bias that impacts the quality and representativeness of the data used in the studies. One prevalent source of bias lies in the data collection and labeling processes. For instance, some studies may introduce systemic bias through the use of imprecise labeling methods such as name matching for labeling Twitter users’ gender. This approach can lead to mislabeling, especially for individuals with names that are culturally diverse or androgynous and introduce inaccuracies into the training data. Another problem is the introduction of sampling bias through the use of artificially balanced data sets, creating an unrepresentative sample of the Twitter population, which, in reality, has a skewed distribution, with certain age and gender groups being more prevalent than others.</p>
          <p>It is important to address and limit these biases because when ML models are trained on biased data, they tend to replicate and amplify these biases in their predictions [<xref ref-type="bibr" rid="ref145">145</xref>].</p>
          <p>The prediction of demographic information is an important task to address to fully realize the potential advantages of using social media data, such as those of using Twitter data in health-related research. In the United States, the National Institute of Health has committed to including women participants in clinical studies and including sex as a biological variable, finding that the disaggregation of data by sex will allow for sex-based comparisons of results to identify any sex-based differences. A recent review [<xref ref-type="bibr" rid="ref146">146</xref>] found that this disaggregation in the development of ML models led to the discovery of sex-based differences that improved the model performance for sex-specific cohorts. Age is also important, as it can correlate and be a factor in the course and progression of disease [<xref ref-type="bibr" rid="ref146">146</xref>] or the effects of medication [<xref ref-type="bibr" rid="ref147">147</xref>]. Given the significance of this information, accurate and reproducible models must be developed. One way to ensure the reproducibility of models is for researchers to make data and codes available, including annotation guidelines. In addition to model performance, studies that create annotated corpora should report annotator agreement measures to assess the quality of the corpora. Few of the included studies made their data or code available (<xref ref-type="supplementary-material" rid="app3">Multimedia Appendices 3</xref>, <xref ref-type="supplementary-material" rid="app4">4</xref>, and <xref ref-type="supplementary-material" rid="app6">6</xref>).</p>
          <p>A particular difficulty when comparing different systems comes from a lack of a <italic>gold standard</italic> that can be used to compare the systems. Some studies created their own corpora, collecting data randomly or based on keywords relevant to their studies. Others reused data sets from prior studies or shared tasks. Although outside the scope of this review, there have been shared tasks that aim to advance research through competition, focusing on gender and age prediction. A longstanding shared task focused on author profiling was hosted at the PAN workshop of CLEF [<xref ref-type="bibr" rid="ref123">123</xref>-<xref ref-type="bibr" rid="ref129">129</xref>]. More recently, Social Media Mining for Health (SMM4H), 2022, included 2 tasks for age detection, releasing new annotated corpora for the tasks [<xref ref-type="bibr" rid="ref148">148</xref>]; several researchers reported using the corpora from these shared tasks. Testing and reporting performance metrics against these publicly available data sets, without alteration, would provide a comparable metric of different approaches. However, although reusing annotated corpora provides quick access to labeled data, it does have some limitations, including data loss over time as users delete their tweets, which not only reduces the size of the data but also can result in a data imbalance in the corpus.</p>
          <p>A summary of our recommendations to reduce some potential bias in the data and improve the classification, reproducibility, and validation of the ML methods used can be found in <xref rid="figure2" ref-type="fig">Figure 2</xref>.</p>
          <fig id="figure2" position="float">
            <label>Figure 2</label>
            <caption>
              <p>Summary of recommendations for best practices in the collection of training data, and the development and dissemination of age or gender machine learning prediction models.</p>
            </caption>
            <graphic xlink:href="jmir_v26i1e47923_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
        </sec>
        <sec>
          <title>Gender Prediction</title>
          <p>Almost all the included studies approached the gender prediction task as a binary classification task, identifying a user as either male or female. We note that even when focusing on binary gender classification, which is the prevalent approach, the task of gender prediction on Twitter could be better characterized as a multinomial classification task: given a user account, the classifier should return male, female, or <italic>nonpersonal</italic>. The last label (nonpersonal) can account for Twitter users representing organizations or bots. Although some studies attempted to identify and exclude nonpersonal accounts as a preprocessing step, other studies developed their systems using previously annotated data sets that were exclusively labeled as male or female users or removed nonpersonal accounts during annotation before training and testing. It is unknown how well these systems would perform when extended to unseen data that may contain nonpersonal accounts.</p>
          <p>Excluding nonpersonal accounts, the ratio of male users to female users in the training data set is also important, as it should mimic the natural distribution of Twitter users, estimated to be 31.5% female users and 68.5% male users as of January 2021 [<xref ref-type="bibr" rid="ref149">149</xref>]. However, some authors biased their collections using unconventional methods of collection or using artificially balanced data sets. The most conventional method to collect a set of Twitter accounts is to query for any tweet mentioning functional words without semantic meaning such as <italic>of</italic>, <italic>the</italic>, or <italic>and</italic> from the Twitter API. Whereas collecting Twitter users using functional or neutral keywords, a given language, or geographic areas resulted in a male:female ratio close to the ratio naturally observed on Twitter, other choices resulted in collections with different ratios. Such changes in ratios could have improved (or deteriorated) the training of the authors’ classifiers and biased their evaluations, which did not reflect the performance of their approach on a random sample of Twitter users.</p>
          <p>All studies treated gender as a binary determination of male or female. Although some referenced the limitation of this approach, they opted to use these designations given the need to align their data with outside resources, such as the US census or social security administration data. We note that gender, unlike biological sex, is not necessarily binary as it is a social construct and has been shown to influence a person’s use of health care, interactions, therapeutic responses, disease perceptions, and decision-making [<xref ref-type="bibr" rid="ref150">150</xref>]; this underlies the importance of expanding the efforts of classification beyond binary to improve accuracy and avoid misinterpreting results.</p>
        </sec>
        <sec>
          <title>Age Prediction</title>
          <p>The age prediction task generally had a lower performance than the gender prediction task. This was true for studies that developed their own models as well as those that used open-source or off-the-shelf software. This may be because most studies approached age prediction as a multiclass classification task. The proxies used, such as language, names, networks, or images, may have limited predictive value for age. In addition, the distribution of Twitter users means that any data set will be inherently imbalanced, providing few training examples for age groups at the tail end of the distribution. This data imbalance may lead to too few instances of the minority classes to effectively train the classifier. For classification models based on images, poor performance for age may be unsurprising given that it can be difficult for humans to discern age from a single image. In addition, photos may be subject to photo editing or enhancement or may not be a recent photograph of the user. Because of a lack of error analysis reports in the included studies, it is difficult to determine the source of the classification difficulty for age.</p>
          <p>Performance aside, the fact that the number and range of age groups vary across studies suggests that a classification approach is not generalizable to all research applications. Identifying the exact age, rather than age groups, can generalize to applications that do not align with predefined groupings of binary or multiclass models; however, using high-precision rules to extract self-reports of exact age from the user’s profile metadata had been shown not to scale. As we worked on this study, we noted that none of the reviewed systems opted for extracting the exact age. To test the feasibility and utility of a generalizable system that extracts the exact age from a tweet in a user’s timeline using deep learning methods, separate from this study, our group developed a classification and extraction pipeline using the RoBERTa-Large model and a rule-based extraction model [<xref ref-type="bibr" rid="ref151">151</xref>]. The system was trained and tested on 11,000 annotated tweets. The classification of tweets mentioning an age achieved an <italic>F</italic><sub>1</sub>-score of 0.93, and the extraction of age from these tweets achieved an <italic>F</italic><sub>1</sub>-score of 0.86. From a collection of 245,947 users, age was extracted for 54% using REPORTage. A shared task for the classification task ran at the SMM4H 2022 workshop, and we released the annotated data set. We did not include our approach in the scoping review, as there were no comparable systems published before the release of the exact age extraction approach as part of the SMM4H 2022 shared task.</p>
        </sec>
        <sec>
          <title>Potential Bias of Differing Methods</title>
          <p>The limitations of using names to distinguish between genders may promote bias, particularly if the names used for training do not represent the ethnic diversity of the population, and some cultures may have more unisex names than others, which cannot be used to distinguish genders. There can be a high degree of uncertainty for many users for whom gender cannot be classified by name; estimates by Sloan et al [<xref ref-type="bibr" rid="ref152">152</xref>] are that 52% of users will be unclassified using this method. However, studies have suggested that the classifications made may be relatively accurate given that the data from UK Twitter demonstrates a high level of agreement with the UK census data [<xref ref-type="bibr" rid="ref153">153</xref>]. Furthermore, when used alone, this heuristic may label some organization accounts, such as PAUL_BAKERY, as a person.</p>
          <p>Relying on self-declarations may be prone to bias as well. For example, younger people are more likely to profess their age than older adults, as age may be more important to them. With respect to gender pronouns, these may be more likely to be declared by those in some occupations or age groups. Indeed, there may also be other biases to self-declarations of data based on culture, background, social class, or country of origin or residence.</p>
          <p>Using users’ profile images for gender and age identification is challenging. Not all Twitter users provide a picture of themselves, with many opting for pictures of their pets, objects, children, scenery, or even celebrities. Identifying the gender and age of even those with pictures of themselves can be problematic if the quality of the pictures is poor, the pictures contain more than 1 face, or the pictures are not recent, particularly for predicting age. A comparison of systems using images to predict demographics [<xref ref-type="bibr" rid="ref154">154</xref>] measured not only the accuracy in identifying age and gender but also the percentage of images in which a face could be detected, finding that only approximately 30% of Twitter users had a single detectable face.</p>
          <p>Methods to filter out organizations in the studies included removing accounts with a large number of followers [<xref ref-type="bibr" rid="ref71">71</xref>] or explicitly searching for organizations by matching username terms linked to economic activities, such as restaurant and hotel [<xref ref-type="bibr" rid="ref49">49</xref>]. These methods remove accounts that do not represent a single user. However, they do not remove bots. Although one of the studies created a classifier to detect bots, the filtering of bots was limited to those identified in manual annotation, by simple heuristics, or nonexistent in many studies (<xref ref-type="supplementary-material" rid="app5">Multimedia Appendix 5</xref>).</p>
        </sec>
        <sec>
          <title>Validation of Age and Gender Proxies</title>
          <p>For cases where age or gender are estimated, it is necessary to conduct validation exercises whereby the data are compared with a <italic>gold-standard data set</italic> to establish accuracy levels. For example, 1 study [<xref ref-type="bibr" rid="ref119">119</xref>] that used off-the-shelf software also created a manually annotated gold-standard data set for measuring accuracy. This study found that although the accuracy of crowdsourcing was higher than that of software, the accuracy was only approximately 60% for age. This puts into question the use of manual annotations alone as a gold standard.</p>
          <p>The most reliable way of generating a <italic>gold standard</italic> is to obtain the information directly from the user. This may be done in the form of direct correspondence with the user, such as messaging via social media or, the other way around, requesting Twitter handles in surveys that collect demographic data. Other methods for validation, such as manual extraction, may be less rigorous. However, these methods can be improved by multiple independent annotators, using experienced teams.</p>
          <p>External validation of the model is also a vital step to assess how the model will perform on unseen data [<xref ref-type="bibr" rid="ref155">155</xref>,<xref ref-type="bibr" rid="ref156">156</xref>]. In a validation on a second data set, Yang et al [<xref ref-type="bibr" rid="ref117">117</xref>] found that performance dropped in all but 2 of their models, stressing the importance of benchmarking existing systems on a targeted corpus. This step is equally important when using existing systems, so a range of expected performances can be reported and used in any analysis of the output.</p>
          <p>In addition to the potential biases reported earlier, predicting the age and gender of Twitter users has some potential limitations that should be considered and, when possible, addressed to limit their effects. As evidenced by the performance results of the included studies, determining the precise age or age group of Twitter users solely based on their Twitter profiles and tweet content can be challenging. Although methods to extract a user’s self-reported age can be executed with high precision [<xref ref-type="bibr" rid="ref151">151</xref>], predicting age, especially for more specific age groups, remains a complex task. Another limitation to consider is the potential for users to misrepresent their reported age or gender, which can introduce inaccuracies and affect the reliability of predictions based on user-supplied data. This phenomenon is not unique to Twitter and has been identified in other data sources such as surveys [<xref ref-type="bibr" rid="ref157">157</xref>,<xref ref-type="bibr" rid="ref158">158</xref>]. Many of the included studies used self-reported data to label their training data; therefore, any potential misrepresentations could be approached as a noisy label problem. There are numerous methods that can be used to manage the effect of label noise on classification models, such as distance learning or ensemble methods [<xref ref-type="bibr" rid="ref159">159</xref>,<xref ref-type="bibr" rid="ref160">160</xref>]. Furthermore, it is important to effectively address potential noise and uncertainty when using the output data for secondary analysis. Statistical techniques that can handle imprecise or uncertain data, such as Bayesian inference or fuzzy logic, can be valuable in this context. Using these methods, the analysis can better account for uncertain predictions, leading to more robust and reliable results. Finally, users’ age changes over time, and their profiles may not be updated accordingly, or the age tweet may be from an earlier year and not reflect their current age. Researchers should ensure that the users’ labeled age is contemporaneous with the other data included in the prediction model. Predicting the age and gender of Twitter users provides valuable insights, and most identified limitations presented by the data can be mitigated.</p>
        </sec>
      </sec>
      <sec>
        <title>Ethical Considerations</title>
        <p>Several studies have shown that social media users generally do not have concerns about their data being used for research or even have favorable opinions about it [<xref ref-type="bibr" rid="ref161">161</xref>,<xref ref-type="bibr" rid="ref162">162</xref>]. However, the ethical frameworks for the use of these data are still being developed [<xref ref-type="bibr" rid="ref163">163</xref>-<xref ref-type="bibr" rid="ref165">165</xref>], and institutional review boards may deem the use of publicly available data, such as those collected from Twitter, as exempt from human participant research; however, it is incumbent on the researcher to consult with their institutional review boards or equivalent ethical committees to obtain such exemptions [<xref ref-type="bibr" rid="ref165">165</xref>]. Although the data are publicly available, it is important to carefully consider potential ethical implications when predicting the age and gender of Twitter users. This process may raise privacy concerns, particularly when publishing data that may be considered sensitive, necessitating the protection of user identities and the anonymization of data to prevent reidentification [<xref ref-type="bibr" rid="ref166">166</xref>]. Anonymizing the data may include removing user identifiers, modifying the tweet text, or generating synthetic tweets [<xref ref-type="bibr" rid="ref165">165</xref>]. In addition, automated methods for predicting user age or gender have limitations and may result in misclassifications. Transparency regarding the limitations of the methods, algorithms, and data sources used in age and gender prediction are essential to report so that any use of these methods or data in secondary analysis can take such limitations into account. Although the prediction of age and gender may present some potential ethical concerns, it is important to recognize that there are also benefits to the use of these data for health research that can outweigh these concerns, such as eliciting insights into disease prevalence, patterns, and variations or distinguishing health behaviors and attitudes across different subgroups.</p>
      </sec>
      <sec>
        <title>Limitations</title>
        <p>It is unlikely that we have identified all studies using off-the-shelf software, as we did not search for specific named software, but part of our remit was to identify the array of software used. We did not limit our inclusion to only studies that developed their own software; therefore, we have included studies that used proprietary software. These software products do not publish their methodologies; therefore, we are unable to directly compare these approaches with others.</p>
        <p>We also included studies for which the prediction of age and gender was secondary to the primary focus of their study. These studies either used proprietary software, previously developed methods, or developed limited methods to predict demographic information. In general, these studies did not report the performance of their prediction methods on their data sets. Although some reported the original performance metrics of the methods used, it cannot be assumed that these methods will perform similarly across all data.</p>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>The prediction of demographic data, such as age and gender, is an important step in increasing the value and application of social media data. Many methods have been reported in the literature with differing degrees of success. Although we sought to explore whether deep learning approaches would advance the performance for these tasks as they have been shown to do for other natural language processing tasks, many of the included studies used traditional ML methods. Although only explored by a handful of studies, deep learning methods appear to perform well for the prediction of a user’s gender or age. However, direct comparison of the published methods was impossible, as different test sets were used in the studies. This highlights the need for recently developed, publicly available gold-standard corpora, such as those released for shared tasks such as SMM4H or PAN-CLEF, to have unbiased data and baseline metrics to compare different approaches going forward.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>PRISMA-ScR (Preferred Reporting Items for Systematic Reviews and Meta-Analyses Extension for Scoping Reviews) checklist.</p>
        <media xlink:href="jmir_v26i1e47923_app1.docx" xlink:title="DOCX File , 30 KB"/>
      </supplementary-material>
      <supplementary-material id="app2">
        <label>Multimedia Appendix 2</label>
        <p>Search strategies and results for individual databases.</p>
        <media xlink:href="jmir_v26i1e47923_app2.docx" xlink:title="DOCX File , 30 KB"/>
      </supplementary-material>
      <supplementary-material id="app3">
        <label>Multimedia Appendix 3</label>
        <p>Extracted data from the included studies predicting gender.</p>
        <media xlink:href="jmir_v26i1e47923_app3.xlsx" xlink:title="XLSX File  (Microsoft Excel File), 44 KB"/>
      </supplementary-material>
      <supplementary-material id="app4">
        <label>Multimedia Appendix 4</label>
        <p>Extracted data from the included studies predicting age.</p>
        <media xlink:href="jmir_v26i1e47923_app4.xlsx" xlink:title="XLSX File  (Microsoft Excel File), 27 KB"/>
      </supplementary-material>
      <supplementary-material id="app5">
        <label>Multimedia Appendix 5</label>
        <p>Information on the identification and removal of nonpersonal or bot accounts from the data set. Features used for annotation or prediction of gender or age.</p>
        <media xlink:href="jmir_v26i1e47923_app5.xlsx" xlink:title="XLSX File  (Microsoft Excel File), 22 KB"/>
      </supplementary-material>
      <supplementary-material id="app6">
        <label>Multimedia Appendix 6</label>
        <p>Details of corpora created in the included studies and their reported availability.</p>
        <media xlink:href="jmir_v26i1e47923_app6.xlsx" xlink:title="XLSX File  (Microsoft Excel File), 21 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">API</term>
          <def>
            <p>application programming interface</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">CLEF</term>
          <def>
            <p>Conference and Labs of the Evaluation Forum</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">DNN</term>
          <def>
            <p>deep neural network</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">ML</term>
          <def>
            <p>machine learning</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">PICOS</term>
          <def>
            <p>Population, Intervention, Comparison, Outcomes, and Study Design</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">PRISMA-ScR</term>
          <def>
            <p>Preferred Reporting Items for Systematic Reviews and Meta-Analyses Extension for Scoping Reviews</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">SMM4H</term>
          <def>
            <p>Social Media Mining for Health</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">SVM</term>
          <def>
            <p>support vector machine</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>This work was supported by the National Institutes of Health (NIH) National Library of Medicine (NLM) under grant NIH-NLM R01LM011176. The NIH NLM funded this research but was not involved in the design or conduct of the study; collection, management, analysis, or interpretation of the data; preparation, review, or approval of the manuscript; or the decision to submit the manuscript for publication.</p>
    </ack>
    <notes>
      <sec>
        <title>Data Availability</title>
        <p>The included studies are available on the web. The search strategy and extracted data on included studies are available in <xref ref-type="supplementary-material" rid="app2">Multimedia Appendices 2</xref>-<xref ref-type="supplementary-material" rid="app6">6</xref>.</p>
      </sec>
    </notes>
    <fn-group>
      <fn fn-type="con">
        <p>SG, KO, and GGH devised the study and identified data for extraction. SG created and executed the search strategy and created the initial draft of the manuscript. SG and KO were responsible for study selection. All the authors were responsible for data extraction, summarization, and discussion. KO synthesized all data and created all tables. All the authors commented on and edited the manuscript. KO provided the final version of the manuscript. All the authors contributed to the final draft of the manuscript.</p>
      </fn>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="web">
          <article-title>Real-world evidence</article-title>
          <source>U.S. Food and Drug Administration</source>
          <access-date>2023-03-30</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.fda.gov/science-research/science-and-research-special-topics/real-world-evidence">https://www.fda.gov/science-research/science-and-research-special-topics/real-world-evidence</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Alessa</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Faezipour</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>A review of influenza detection and prediction through social networking sites</article-title>
          <source>Theor Biol Med Model</source>
          <year>2018</year>
          <month>02</month>
          <day>01</day>
          <volume>15</volume>
          <issue>1</issue>
          <fpage>2</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://tbiomed.biomedcentral.com/articles/10.1186/s12976-017-0074-5"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12976-017-0074-5</pub-id>
          <pub-id pub-id-type="medline">29386017</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12976-017-0074-5</pub-id>
          <pub-id pub-id-type="pmcid">PMC5793414</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bisanzio</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Kraemer</surname>
              <given-names>MU</given-names>
            </name>
            <name name-style="western">
              <surname>Bogoch</surname>
              <given-names>II</given-names>
            </name>
            <name name-style="western">
              <surname>Brewer</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Brownstein</surname>
              <given-names>JS</given-names>
            </name>
            <name name-style="western">
              <surname>Reithinger</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Use of Twitter social media activity as a proxy for human mobility to predict the spatiotemporal spread of COVID-19 at global scale</article-title>
          <source>Geospat Health</source>
          <year>2020</year>
          <month>06</month>
          <day>15</day>
          <volume>15</volume>
          <issue>1</issue>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.4081/gh.2020.882"/>
          </comment>
          <pub-id pub-id-type="doi">10.4081/gh.2020.882</pub-id>
          <pub-id pub-id-type="medline">32575957</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Magge</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Tutubalina</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Miftahutdinov</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Alimova</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Dirkson</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Verberne</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Weissenbacher</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Gonzalez-Hernandez</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>DeepADEMiner: a deep learning pharmacovigilance pipeline for extraction and normalization of adverse drug event mentions on Twitter</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2021</year>
          <month>09</month>
          <day>18</day>
          <volume>28</volume>
          <issue>10</issue>
          <fpage>2184</fpage>
          <lpage>92</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/34270701"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/jamia/ocab114</pub-id>
          <pub-id pub-id-type="medline">34270701</pub-id>
          <pub-id pub-id-type="pii">6322900</pub-id>
          <pub-id pub-id-type="pmcid">PMC8449608</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Nikfarjam</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Sarker</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>O'Connor</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Ginn</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Gonzalez</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Pharmacovigilance from social media: mining adverse drug reaction mentions using sequence labeling with word embedding cluster features</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2015</year>
          <month>05</month>
          <volume>22</volume>
          <issue>3</issue>
          <fpage>671</fpage>
          <lpage>81</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/25755127"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/jamia/ocu041</pub-id>
          <pub-id pub-id-type="medline">25755127</pub-id>
          <pub-id pub-id-type="pii">ocu041</pub-id>
          <pub-id pub-id-type="pmcid">PMC4457113</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Guntuku</surname>
              <given-names>SC</given-names>
            </name>
            <name name-style="western">
              <surname>Sherman</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Stokes</surname>
              <given-names>DC</given-names>
            </name>
            <name name-style="western">
              <surname>Agarwal</surname>
              <given-names>AK</given-names>
            </name>
            <name name-style="western">
              <surname>Seltzer</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Merchant</surname>
              <given-names>RM</given-names>
            </name>
            <name name-style="western">
              <surname>Ungar</surname>
              <given-names>LH</given-names>
            </name>
          </person-group>
          <article-title>Tracking mental health and symptom mentions on twitter during COVID-19</article-title>
          <source>J Gen Intern Med</source>
          <year>2020</year>
          <month>09</month>
          <volume>35</volume>
          <issue>9</issue>
          <fpage>2798</fpage>
          <lpage>800</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/32638321"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/s11606-020-05988-8</pub-id>
          <pub-id pub-id-type="medline">32638321</pub-id>
          <pub-id pub-id-type="pii">10.1007/s11606-020-05988-8</pub-id>
          <pub-id pub-id-type="pmcid">PMC7340749</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ma</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Constructing a semantic graph with depression symptoms extraction from Twitter</article-title>
          <source>Proceedings of the IEEE Conference on Computational Intelligence in Bioinformatics and Computational Biology (CIBCB)</source>
          <conf-name>CIBCB 2019</conf-name>
          <conf-date>July 9-11, 2019</conf-date>
          <conf-loc>Siena, Italy</conf-loc>
          <pub-id pub-id-type="doi">10.1109/cibcb.2019.8791452</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bauermeister</surname>
              <given-names>JA</given-names>
            </name>
            <name name-style="western">
              <surname>Zimmerman</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Johns</surname>
              <given-names>MM</given-names>
            </name>
            <name name-style="western">
              <surname>Glowacki</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Stoddard</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Volz</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Innovative recruitment using online networks: lessons learned from an online study of alcohol and other drug use utilizing a web-based, respondent-driven sampling (webRDS) strategy</article-title>
          <source>J Stud Alcohol Drugs</source>
          <year>2012</year>
          <month>09</month>
          <volume>73</volume>
          <issue>5</issue>
          <fpage>834</fpage>
          <lpage>8</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/22846248"/>
          </comment>
          <pub-id pub-id-type="doi">10.15288/jsad.2012.73.834</pub-id>
          <pub-id pub-id-type="medline">22846248</pub-id>
          <pub-id pub-id-type="pmcid">PMC3410951</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Weissenbacher</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Flores</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>O'Connor</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Rawal</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Stevens</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Gonzalez-Hernandez</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Automatic cohort determination from Twitter for HIV prevention amongst Black and Hispanic men</article-title>
          <source>AMIA Jt Summits Transl Sci Proc</source>
          <year>2022</year>
          <fpage>504</fpage>
          <lpage>13</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/35854738"/>
          </comment>
          <pub-id pub-id-type="medline">35854738</pub-id>
          <pub-id pub-id-type="pii">2315</pub-id>
          <pub-id pub-id-type="pmcid">PMC9285152</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="web">
          <article-title>Twitter API</article-title>
          <source>Twitter</source>
          <access-date>2023-03-30</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://developer.twitter.com/en/docs/twitter-api">https://developer.twitter.com/en/docs/twitter-api</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sinnenberg</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Buttenheim</surname>
              <given-names>AM</given-names>
            </name>
            <name name-style="western">
              <surname>Padrez</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Mancheno</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Ungar</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Merchant</surname>
              <given-names>RM</given-names>
            </name>
          </person-group>
          <article-title>Twitter as a tool for health research: a systematic review</article-title>
          <source>Am J Public Health</source>
          <year>2017</year>
          <month>01</month>
          <volume>107</volume>
          <issue>1</issue>
          <fpage>e1</fpage>
          <lpage>8</lpage>
          <pub-id pub-id-type="doi">10.2105/AJPH.2016.303512</pub-id>
          <pub-id pub-id-type="medline">27854532</pub-id>
          <pub-id pub-id-type="pmcid">PMC5308155</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gustafson</surname>
              <given-names>DL</given-names>
            </name>
            <name name-style="western">
              <surname>Woodworth</surname>
              <given-names>CF</given-names>
            </name>
          </person-group>
          <article-title>Methodological and ethical issues in research using social media: a metamethod of Human Papillomavirus vaccine studies</article-title>
          <source>BMC Med Res Methodol</source>
          <year>2014</year>
          <month>12</month>
          <day>02</day>
          <volume>14</volume>
          <fpage>127</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcmedresmethodol.biomedcentral.com/articles/10.1186/1471-2288-14-127"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/1471-2288-14-127</pub-id>
          <pub-id pub-id-type="medline">25468265</pub-id>
          <pub-id pub-id-type="pii">1471-2288-14-127</pub-id>
          <pub-id pub-id-type="pmcid">PMC4265425</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Beck</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>McSweeney</surname>
              <given-names>JC</given-names>
            </name>
            <name name-style="western">
              <surname>Richards</surname>
              <given-names>KC</given-names>
            </name>
            <name name-style="western">
              <surname>Roberson</surname>
              <given-names>PK</given-names>
            </name>
            <name name-style="western">
              <surname>Tsai</surname>
              <given-names>PF</given-names>
            </name>
            <name name-style="western">
              <surname>Souder</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Challenges in tailored intervention research</article-title>
          <source>Nurs Outlook</source>
          <year>2010</year>
          <month>3</month>
          <volume>58</volume>
          <issue>2</issue>
          <fpage>104</fpage>
          <lpage>10</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/20362779"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.outlook.2009.10.004</pub-id>
          <pub-id pub-id-type="medline">20362779</pub-id>
          <pub-id pub-id-type="pii">S0029-6554(09)00194-8</pub-id>
          <pub-id pub-id-type="pmcid">PMC3136169</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rimer</surname>
              <given-names>BK</given-names>
            </name>
            <name name-style="western">
              <surname>Kreuter</surname>
              <given-names>MW</given-names>
            </name>
          </person-group>
          <article-title>Advancing tailored health communication: a persuasion and message effects perspective</article-title>
          <source>J Commun</source>
          <year>2006</year>
          <month>08</month>
          <volume>56</volume>
          <issue>s1</issue>
          <fpage>S184</fpage>
          <lpage>201</lpage>
          <pub-id pub-id-type="doi">10.1111/j.1460-2466.2006.00289.x</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hinds</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Joinson</surname>
              <given-names>AN</given-names>
            </name>
          </person-group>
          <article-title>What demographic attributes do our digital footprints reveal? A systematic review</article-title>
          <source>PLoS One</source>
          <year>2018</year>
          <month>11</month>
          <day>28</day>
          <volume>13</volume>
          <issue>11</issue>
          <fpage>e0207112</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.plos.org/10.1371/journal.pone.0207112"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0207112</pub-id>
          <pub-id pub-id-type="medline">30485305</pub-id>
          <pub-id pub-id-type="pii">PONE-D-17-29489</pub-id>
          <pub-id pub-id-type="pmcid">PMC6261568</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Dredze</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>How social media will change public health</article-title>
          <source>IEEE Intell Syst</source>
          <year>2012</year>
          <month>07</month>
          <volume>27</volume>
          <issue>4</issue>
          <fpage>81</fpage>
          <lpage>4</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://ieeexplore.ieee.org/xpls/abs_all.jsp?arnumber=6285937"/>
          </comment>
          <pub-id pub-id-type="doi">10.1109/MIS.2012.76</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Golder</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Stevens</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>O'Connor</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>James</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Gonzalez-Hernandez</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Methods to establish race or ethnicity of twitter users: scoping review</article-title>
          <source>J Med Internet Res</source>
          <year>2022</year>
          <month>04</month>
          <day>29</day>
          <volume>24</volume>
          <issue>4</issue>
          <fpage>e35788</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2022/4/e35788/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/35788</pub-id>
          <pub-id pub-id-type="medline">35486433</pub-id>
          <pub-id pub-id-type="pii">v24i4e35788</pub-id>
          <pub-id pub-id-type="pmcid">PMC9107046</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bour</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Ahne</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Schmitz</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Perchoux</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Dessenne</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Fagherazzi</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>The use of social media for health research purposes: scoping review</article-title>
          <source>J Med Internet Res</source>
          <year>2021</year>
          <month>05</month>
          <day>27</day>
          <volume>23</volume>
          <issue>5</issue>
          <fpage>e25736</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2021/5/e25736/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/25736</pub-id>
          <pub-id pub-id-type="medline">34042593</pub-id>
          <pub-id pub-id-type="pii">v23i5e25736</pub-id>
          <pub-id pub-id-type="pmcid">PMC8193478</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>van Oosterhout</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>de Boer</surname>
              <given-names>AR</given-names>
            </name>
            <name name-style="western">
              <surname>Maas</surname>
              <given-names>AH</given-names>
            </name>
            <name name-style="western">
              <surname>Rutten</surname>
              <given-names>FH</given-names>
            </name>
            <name name-style="western">
              <surname>Bots</surname>
              <given-names>ML</given-names>
            </name>
            <name name-style="western">
              <surname>Peters</surname>
              <given-names>SA</given-names>
            </name>
          </person-group>
          <article-title>Sex differences in symptom presentation in acute coronary syndromes: a systematic review and meta-analysis</article-title>
          <source>J Am Heart Assoc</source>
          <year>2020</year>
          <month>05</month>
          <day>05</day>
          <volume>9</volume>
          <issue>9</issue>
          <fpage>e014733</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.ahajournals.org/doi/10.1161/JAHA.119.014733?url_ver=Z39.88-2003&#38;rfr_id=ori:rid:crossref.org&#38;rfr_dat=cr_pub  0pubmed"/>
          </comment>
          <pub-id pub-id-type="doi">10.1161/JAHA.119.014733</pub-id>
          <pub-id pub-id-type="medline">32363989</pub-id>
          <pub-id pub-id-type="pmcid">PMC7428564</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Trevisan</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Noale</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Prinelli</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Maggi</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Sojic</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Di Bari</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Molinaro</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Bastiani</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Giacomelli</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Galli</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Adorni</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Antonelli Incalzi</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Pedone</surname>
              <given-names>C</given-names>
            </name>
            <collab>EPICOVID19 Working Group</collab>
          </person-group>
          <article-title>Age-related changes in clinical presentation of Covid-19: the EPICOVID19 web-based survey</article-title>
          <source>Eur J Intern Med</source>
          <year>2021</year>
          <month>04</month>
          <volume>86</volume>
          <fpage>41</fpage>
          <lpage>7</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://air.unimi.it/handle/2434/811304"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.ejim.2021.01.028</pub-id>
          <pub-id pub-id-type="medline">33579579</pub-id>
          <pub-id pub-id-type="pii">S0953-6205(21)00034-0</pub-id>
          <pub-id pub-id-type="pmcid">PMC7846211</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Brady</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Nielsen</surname>
              <given-names>MW</given-names>
            </name>
            <name name-style="western">
              <surname>Andersen</surname>
              <given-names>JP</given-names>
            </name>
            <name name-style="western">
              <surname>Oertelt-Prigione</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Lack of consideration of sex and gender in COVID-19 clinical studies</article-title>
          <source>Nat Commun</source>
          <year>2021</year>
          <month>07</month>
          <day>06</day>
          <volume>12</volume>
          <issue>1</issue>
          <fpage>4015</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/s41467-021-24265-8"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41467-021-24265-8</pub-id>
          <pub-id pub-id-type="medline">34230477</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41467-021-24265-8</pub-id>
          <pub-id pub-id-type="pmcid">PMC8260641</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Tannenbaum</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Ellis</surname>
              <given-names>RP</given-names>
            </name>
            <name name-style="western">
              <surname>Eyssel</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Zou</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Schiebinger</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Sex and gender analysis improves science and engineering</article-title>
          <source>Nature</source>
          <year>2019</year>
          <month>11</month>
          <volume>575</volume>
          <issue>7781</issue>
          <fpage>137</fpage>
          <lpage>46</lpage>
          <pub-id pub-id-type="doi">10.1038/s41586-019-1657-6</pub-id>
          <pub-id pub-id-type="medline">31695204</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41586-019-1657-6</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Amiri</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Mohammadzadeh-Naziri</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Abbasi</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Cheraghi</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Jalali-Farahani</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Momenan</surname>
              <given-names>AA</given-names>
            </name>
            <name name-style="western">
              <surname>Amouzegar</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Hadaegh</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Azizi</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <article-title>Smoking habits and incidence of cardiovascular diseases in men and women: findings of a 12 year follow up among an urban Eastern-Mediterranean population</article-title>
          <source>BMC Public Health</source>
          <year>2019</year>
          <month>08</month>
          <day>05</day>
          <volume>19</volume>
          <issue>1</issue>
          <fpage>1042</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcpublichealth.biomedcentral.com/articles/10.1186/s12889-019-7390-0"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12889-019-7390-0</pub-id>
          <pub-id pub-id-type="medline">31382950</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12889-019-7390-0</pub-id>
          <pub-id pub-id-type="pmcid">PMC6683328</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rosselli</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Ermini</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Tosi</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Boddi</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Stefani</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Toncelli</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Modesti</surname>
              <given-names>PA</given-names>
            </name>
          </person-group>
          <article-title>Gender differences in barriers to physical activity among adolescents</article-title>
          <source>Nutr Metab Cardiovasc Dis</source>
          <year>2020</year>
          <month>08</month>
          <day>28</day>
          <volume>30</volume>
          <issue>9</issue>
          <fpage>1582</fpage>
          <lpage>9</lpage>
          <pub-id pub-id-type="doi">10.1016/j.numecd.2020.05.005</pub-id>
          <pub-id pub-id-type="medline">32605880</pub-id>
          <pub-id pub-id-type="pii">S0939-4753(20)30161-7</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>SL</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>WL</given-names>
            </name>
            <name name-style="western">
              <surname>Liang</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Liao</surname>
              <given-names>IC</given-names>
            </name>
          </person-group>
          <article-title>Factors associated with gender differences in medication adherence: a longitudinal study</article-title>
          <source>J Adv Nurs</source>
          <year>2014</year>
          <month>09</month>
          <volume>70</volume>
          <issue>9</issue>
          <fpage>2031</fpage>
          <lpage>40</lpage>
          <pub-id pub-id-type="doi">10.1111/jan.12361</pub-id>
          <pub-id pub-id-type="medline">24506542</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Krueger</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Botermann</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Schorr</surname>
              <given-names>SG</given-names>
            </name>
            <name name-style="western">
              <surname>Griese-Mammen</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Laufs</surname>
              <given-names>U</given-names>
            </name>
            <name name-style="western">
              <surname>Schulz</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Age-related medication adherence in patients with chronic heart failure: a systematic literature review</article-title>
          <source>Int J Cardiol</source>
          <year>2015</year>
          <month>04</month>
          <day>01</day>
          <volume>184</volume>
          <fpage>728</fpage>
          <lpage>35</lpage>
          <pub-id pub-id-type="doi">10.1016/j.ijcard.2015.03.042</pub-id>
          <pub-id pub-id-type="medline">25795085</pub-id>
          <pub-id pub-id-type="pii">S0167-5273(15)00280-6</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Auxier</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Anderson</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Social media use in 2021</article-title>
          <source>Pew Research Center</source>
          <year>2021</year>
          <month>4</month>
          <day>7</day>
          <access-date>2021-12-09</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.pewresearch.org/internet/2021/04/07/social-media-use-in-2021/">https://www.pewresearch.org/internet/2021/04/07/social-media-use-in-2021/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="web">
          <article-title>World population prospects - population division - United Nations</article-title>
          <source>United Nations Department of Economic and Social Affairs Population Division</source>
          <access-date>2023-07-28</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://population.un.org/wpp/">https://population.un.org/wpp/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mislove</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Lehmann</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Ahn</surname>
              <given-names>YY</given-names>
            </name>
            <name name-style="western">
              <surname>Onnela</surname>
              <given-names>JP</given-names>
            </name>
            <name name-style="western">
              <surname>Rosenquist</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Understanding the demographics of Twitter users</article-title>
          <source>Proc Int AAAI Conf Web Soc Media</source>
          <year>2021</year>
          <month>08</month>
          <day>03</day>
          <volume>5</volume>
          <issue>1</issue>
          <fpage>554</fpage>
          <lpage>7</lpage>
          <pub-id pub-id-type="doi">10.1609/icwsm.v5i1.14168</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Fink</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Kopecky</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Morawski</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Inferring gender from the content of tweets: a region specific example</article-title>
          <source>Proc Int AAAI Conf Web Soc Media</source>
          <year>2021</year>
          <month>08</month>
          <day>03</day>
          <volume>6</volume>
          <issue>1</issue>
          <fpage>459</fpage>
          <lpage>62</lpage>
          <pub-id pub-id-type="doi">10.1609/icwsm.v6i1.14320</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Alowibdi</surname>
              <given-names>JS</given-names>
            </name>
            <name name-style="western">
              <surname>Buy</surname>
              <given-names>UA</given-names>
            </name>
            <name name-style="western">
              <surname>Yu</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Language independent gender classification on Twitter</article-title>
          <source>Proceedings of the 2013 IEEE/ACM International Conference on Advances in Social Networks Analysis and Mining</source>
          <conf-name>ASONAM '13</conf-name>
          <conf-date>August 25-28, 2013</conf-date>
          <conf-loc>Niagara Falls, ON</conf-loc>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Qian</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Zhu</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>You</surname>
              <given-names>Z</given-names>
            </name>
          </person-group>
          <article-title>Learning user embedding representation for gender prediction</article-title>
          <source>Proceedings of the IEEE 28th International Conference on Tools with Artificial Intelligence (ICTAI)</source>
          <conf-name>ICTAI 2016</conf-name>
          <conf-date>November 6-8, 2016</conf-date>
          <conf-loc>San Jose, CA</conf-loc>
          <pub-id pub-id-type="doi">10.1109/ictai.2016.0048</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Culotta</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Ravi</surname>
              <given-names>NK</given-names>
            </name>
            <name name-style="western">
              <surname>Cutler</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Predicting the demographics of Twitter users from website traffic data</article-title>
          <source>Proceedings of the Twenty-Ninth AAAI Conference on Artificial Intelligence</source>
          <conf-name>AAAI'15</conf-name>
          <conf-date>January 25-30, 2015</conf-date>
          <conf-loc>Austin, TX</conf-loc>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dl.acm.org/doi/abs/10.5555/2887007.2887018"/>
          </comment>
          <pub-id pub-id-type="doi">10.1609/aaai.v29i1.9204</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sloan</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Morgan</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Burnap</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Williams</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Who tweets? Deriving the demographic characteristics of age, occupation and social class from Twitter user meta-data</article-title>
          <source>PLoS One</source>
          <year>2015</year>
          <volume>10</volume>
          <issue>3</issue>
          <fpage>e0115545</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.plos.org/10.1371/journal.pone.0115545"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0115545</pub-id>
          <pub-id pub-id-type="medline">25729900</pub-id>
          <pub-id pub-id-type="pii">PONE-D-14-36461</pub-id>
          <pub-id pub-id-type="pmcid">PMC4346393</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Oktay</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Firat</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Ertem</surname>
              <given-names>Z</given-names>
            </name>
          </person-group>
          <article-title>Demographic breakdown of Twitter users: an analysis based on names</article-title>
          <source>Proceedings of the Academy of Science and Engineering (ASE)</source>
          <conf-name>ASE'14</conf-name>
          <conf-date>September 15-19, 2014</conf-date>
          <conf-loc>Västerås, Sweden</conf-loc>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.researchgate.net/publication/315538705_Demographic_Breakdown_of_Twitter_Users_An_analysis_based_on_names"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Nguyen</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Gravel</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Trieschnigg</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Meder</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>"How old do you think I am?" A study of language and age in Twitter</article-title>
          <source>Proc Int AAAI Conf Web Soc Media</source>
          <year>2021</year>
          <month>08</month>
          <day>03</day>
          <volume>7</volume>
          <issue>1</issue>
          <fpage>439</fpage>
          <lpage>48</lpage>
          <pub-id pub-id-type="doi">10.1609/icwsm.v7i1.14381</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mikolov</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Corrado</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Dean</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Efficient estimation of word representations in vector space</article-title>
          <source>arXiv. Preprint posted online January 16, 2013</source>
          <pub-id pub-id-type="doi">10.48550/arXiv.1301.3781</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref38">
        <label>38</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mikolov</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Sutskever</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Corrado</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Dean</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Distributed representations of words and phrases and their compositionality</article-title>
          <source>Proceedings of the 26th International Conference on Neural Information Processing Systems - Volume 2</source>
          <conf-name>NIPS'13</conf-name>
          <conf-date>December 5-10, 2013</conf-date>
          <conf-loc>Lake Tahoe, Nevada</conf-loc>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dl.acm.org/doi/proceedings/10.5555/2999792"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref39">
        <label>39</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Tricco</surname>
              <given-names>AC</given-names>
            </name>
            <name name-style="western">
              <surname>Lillie</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Zarin</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>O'Brien</surname>
              <given-names>KK</given-names>
            </name>
            <name name-style="western">
              <surname>Colquhoun</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Levac</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Moher</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Peters</surname>
              <given-names>MD</given-names>
            </name>
            <name name-style="western">
              <surname>Horsley</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Weeks</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Hempel</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Akl</surname>
              <given-names>EA</given-names>
            </name>
            <name name-style="western">
              <surname>Chang</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>McGowan</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Stewart</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Hartling</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Aldcroft</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Wilson</surname>
              <given-names>MG</given-names>
            </name>
            <name name-style="western">
              <surname>Garritty</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Lewin</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Godfrey</surname>
              <given-names>CM</given-names>
            </name>
            <name name-style="western">
              <surname>Macdonald</surname>
              <given-names>MT</given-names>
            </name>
            <name name-style="western">
              <surname>Langlois</surname>
              <given-names>EV</given-names>
            </name>
            <name name-style="western">
              <surname>Soares-Weiser</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Moriarty</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Clifford</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Tunçalp</surname>
              <given-names>Özge</given-names>
            </name>
            <name name-style="western">
              <surname>Straus</surname>
              <given-names>SE</given-names>
            </name>
          </person-group>
          <article-title>PRISMA extension for Scoping Reviews (PRISMA-ScR): checklist and explanation</article-title>
          <source>Ann Intern Med</source>
          <year>2018</year>
          <month>10</month>
          <day>02</day>
          <volume>169</volume>
          <issue>7</issue>
          <fpage>467</fpage>
          <lpage>73</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.acpjournals.org/doi/abs/10.7326/M18-0850?url_ver=Z39.88-2003&#38;rfr_id=ori:rid:crossref.org&#38;rfr_dat=cr_pub  0pubmed"/>
          </comment>
          <pub-id pub-id-type="doi">10.7326/M18-0850</pub-id>
          <pub-id pub-id-type="medline">30178033</pub-id>
          <pub-id pub-id-type="pii">2700389</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref40">
        <label>40</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Umar</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Bashir</surname>
              <given-names>SA</given-names>
            </name>
            <name name-style="western">
              <surname>Abdullahi</surname>
              <given-names>MB</given-names>
            </name>
            <name name-style="western">
              <surname>Adebayo</surname>
              <given-names>OS</given-names>
            </name>
          </person-group>
          <article-title>Comparative study of various machine learning algorithms for tweet classification</article-title>
          <source>i-Manager J Comput Sci</source>
          <year>2019</year>
          <volume>6</volume>
          <issue>4</issue>
          <fpage>12</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://repository.futminna.edu.ng:8080/jspui/handle/123456789/7229"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref41">
        <label>41</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Amir-Behghadami</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Janati</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Population, Intervention, Comparison, Outcomes and Study (PICOS) design as a framework to formulate eligibility criteria in systematic reviews</article-title>
          <source>Emerg Med J</source>
          <year>2020</year>
          <month>06</month>
          <volume>37</volume>
          <issue>6</issue>
          <fpage>387</fpage>
          <pub-id pub-id-type="doi">10.1136/emermed-2020-209567</pub-id>
          <pub-id pub-id-type="medline">32253195</pub-id>
          <pub-id pub-id-type="pii">emermed-2020-209567</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref42">
        <label>42</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bramer</surname>
              <given-names>WM</given-names>
            </name>
            <name name-style="western">
              <surname>Giustini</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Kramer</surname>
              <given-names>BM</given-names>
            </name>
          </person-group>
          <article-title>Comparing the coverage, recall, and precision of searches for 120 systematic reviews in Embase, MEDLINE, and Google Scholar: a prospective study</article-title>
          <source>Syst Rev</source>
          <year>2016</year>
          <month>03</month>
          <day>01</day>
          <volume>5</volume>
          <fpage>39</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://systematicreviewsjournal.biomedcentral.com/articles/10.1186/s13643-016-0215-7"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s13643-016-0215-7</pub-id>
          <pub-id pub-id-type="medline">26932789</pub-id>
          <pub-id pub-id-type="pii">10.1186/s13643-016-0215-7</pub-id>
          <pub-id pub-id-type="pmcid">PMC4772334</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref43">
        <label>43</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kugley</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Wade</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Thomas</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Mahood</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Jørgensen</surname>
              <given-names>A-M</given-names>
            </name>
            <name name-style="western">
              <surname>Hammerstrøm</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Sathe</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <article-title>Searching for studies: a guide to information retrieval for Campbell systematic reviews</article-title>
          <source>Campbell Syst Rev</source>
          <year>2017</year>
          <month>02</month>
          <day>13</day>
          <volume>13</volume>
          <issue>1</issue>
          <fpage>1</fpage>
          <lpage>73</lpage>
          <pub-id pub-id-type="doi">10.4073/cmg.2016.1</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref44">
        <label>44</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lefebvre</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Glanville</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Briscoe</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Littlewood</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Marshall</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Metzendorf</surname>
              <given-names>MI</given-names>
            </name>
            <name name-style="western">
              <surname>Noel-Storr</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Paynter</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Rader</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Thomas</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Wieland</surname>
              <given-names>LS</given-names>
            </name>
          </person-group>
          <person-group person-group-type="editor">
            <name name-style="western">
              <surname>Higgins</surname>
              <given-names>JP</given-names>
            </name>
            <name name-style="western">
              <surname>Thomas</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Chandler</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Cumpston</surname>
              <given-names>MS</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Page</surname>
              <given-names>MJ</given-names>
            </name>
            <name name-style="western">
              <surname>Welch</surname>
              <given-names>VA</given-names>
            </name>
          </person-group>
          <article-title>Technical Supplement to Chapter 4: searching for and selecting studies</article-title>
          <source>Cochrane Handbook for Systematic Reviews of Interventions Version 6.3</source>
          <year>2022</year>
          <publisher-loc>London, UK</publisher-loc>
          <publisher-name>The Cochrane Collaboration</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref45">
        <label>45</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Booth</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>How much searching is enough? Comprehensive versus optimal retrieval for technology assessments</article-title>
          <source>Int J Technol Assess Health Care</source>
          <year>2010</year>
          <month>10</month>
          <volume>26</volume>
          <issue>4</issue>
          <fpage>431</fpage>
          <lpage>5</lpage>
          <pub-id pub-id-type="doi">10.1017/S0266462310000966</pub-id>
          <pub-id pub-id-type="medline">20923586</pub-id>
          <pub-id pub-id-type="pii">S0266462310000966</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref46">
        <label>46</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Briscoe</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Nunns</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Shaw</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>How do Cochrane authors conduct web searching to identify studies? Findings from a cross-sectional sample of Cochrane Reviews</article-title>
          <source>Health Info Libr J</source>
          <year>2020</year>
          <month>12</month>
          <volume>37</volume>
          <issue>4</issue>
          <fpage>293</fpage>
          <lpage>318</lpage>
          <pub-id pub-id-type="doi">10.1111/hir.12313</pub-id>
          <pub-id pub-id-type="medline">32511888</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref47">
        <label>47</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Stansfield</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Dickson</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Bangpan</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Exploring issues in the conduct of website searching and other online sources for systematic reviews: how can we be systematic?</article-title>
          <source>Syst Rev</source>
          <year>2016</year>
          <month>11</month>
          <day>15</day>
          <volume>5</volume>
          <issue>1</issue>
          <fpage>191</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://systematicreviewsjournal.biomedcentral.com/articles/10.1186/s13643-016-0371-9"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s13643-016-0371-9</pub-id>
          <pub-id pub-id-type="medline">27846867</pub-id>
          <pub-id pub-id-type="pii">10.1186/s13643-016-0371-9</pub-id>
          <pub-id pub-id-type="pmcid">PMC5111285</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref48">
        <label>48</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Godin</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Stapleton</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Kirkpatrick</surname>
              <given-names>SI</given-names>
            </name>
            <name name-style="western">
              <surname>Hanning</surname>
              <given-names>RM</given-names>
            </name>
            <name name-style="western">
              <surname>Leatherdale</surname>
              <given-names>ST</given-names>
            </name>
          </person-group>
          <article-title>Applying systematic review search methods to the grey literature: a case study examining guidelines for school-based breakfast programs in Canada</article-title>
          <source>Syst Rev</source>
          <year>2015</year>
          <month>10</month>
          <day>22</day>
          <volume>4</volume>
          <fpage>138</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://systematicreviewsjournal.biomedcentral.com/articles/10.1186/s13643-015-0125-0"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s13643-015-0125-0</pub-id>
          <pub-id pub-id-type="medline">26494010</pub-id>
          <pub-id pub-id-type="pii">10.1186/s13643-015-0125-0</pub-id>
          <pub-id pub-id-type="pmcid">PMC4619264</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref49">
        <label>49</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Alessandra</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Gentile</surname>
              <given-names>MM</given-names>
            </name>
            <name name-style="western">
              <surname>Bianco</surname>
              <given-names>DM</given-names>
            </name>
          </person-group>
          <person-group person-group-type="editor">
            <name name-style="western">
              <surname>Petrucci</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Racioppi</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Verde</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Who tweets in Italian? Demographic characteristics of twitter users</article-title>
          <source>New Statistical Developments in Data Science</source>
          <year>2019</year>
          <month>8</month>
          <day>21</day>
          <publisher-loc>Cham, Switzerland</publisher-loc>
          <publisher-name>Springer</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref50">
        <label>50</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Alfayez</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Awwad</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Kerr</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Alrashed</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Williams</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Al-Wabil</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <person-group person-group-type="editor">
            <name name-style="western">
              <surname>Meiselwitz</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Understanding gendered spaces using social media data</article-title>
          <source>Social Computing and Social Media. Applications and Analytics</source>
          <year>2017</year>
          <publisher-loc>Cham, Switzerland</publisher-loc>
          <publisher-name>Springer</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref51">
        <label>51</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Arafat</surname>
              <given-names>TA</given-names>
            </name>
            <name name-style="western">
              <surname>Budi</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Mahendra</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Salehah</surname>
              <given-names>DA</given-names>
            </name>
          </person-group>
          <article-title>Demographic analysis of candidates supporter in Twitter during Indonesian presidential election 2019</article-title>
          <source>Proceedings of the International Conference on ICT for Smart Society (ICISS)</source>
          <conf-name>ICISS 2020</conf-name>
          <conf-date>November 19-20, 2020</conf-date>
          <conf-loc>Bandung, Indonesia</conf-loc>
          <pub-id pub-id-type="doi">10.1109/iciss50791.2020.9307598</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref52">
        <label>52</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ardehaly</surname>
              <given-names>EM</given-names>
            </name>
            <name name-style="western">
              <surname>Culotta</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Co-training for demographic classification using deep learning from label proportions</article-title>
          <source>Proceedings of the IEEE International Conference on Data Mining Workshops (ICDMW)</source>
          <conf-name>ICDMW 2017</conf-name>
          <conf-date>November 18-21, 2017</conf-date>
          <conf-loc>New Orleans, LA</conf-loc>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://ieeexplore.ieee.org/document/8215778"/>
          </comment>
          <pub-id pub-id-type="doi">10.1109/icdmw.2017.144</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref53">
        <label>53</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ardehaly</surname>
              <given-names>EM</given-names>
            </name>
            <name name-style="western">
              <surname>Culotta</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Learning from noisy label proportions for classifying online social data</article-title>
          <source>Soc Netw Anal Mining</source>
          <year>2017</year>
          <month>11</month>
          <day>27</day>
          <volume>8</volume>
          <issue>1</issue>
          <pub-id pub-id-type="doi">10.1007/s13278-017-0478-6</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref54">
        <label>54</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Baxevanakis</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Gavras</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Mouratidis</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Kermanidis</surname>
              <given-names>KL</given-names>
            </name>
          </person-group>
          <article-title>A machine learning approach for gender identification of Greek tweet authors</article-title>
          <source>Proceedings of the 13th ACM International Conference on PErvasive Technologies Related to Assistive Environments</source>
          <conf-name>PETRA '20</conf-name>
          <conf-date>June 30-July 3, 2020</conf-date>
          <conf-loc>Corfu, Greece</conf-loc>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dl-acm-org.proxy.library.upenn.edu/doi/10.1145/3389189.3397992"/>
          </comment>
          <pub-id pub-id-type="doi">10.1145/3389189.3397992</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref55">
        <label>55</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bayot</surname>
              <given-names>RK</given-names>
            </name>
            <name name-style="western">
              <surname>Gonçalves</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Age and gender classification of Tweets using convolutional neural networks</article-title>
          <source>Proceedings of the Third International Conference, MOD 2017</source>
          <conf-name>MOD 2017</conf-name>
          <conf-date>September 14-17, 2017</conf-date>
          <conf-loc>Volterra, Italy</conf-loc>
          <pub-id pub-id-type="doi">10.1007/978-3-319-72926-8_28</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref56">
        <label>56</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Brandt</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Buckingham</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Buntain</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Anderson</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Ray</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Pool</surname>
              <given-names>JR</given-names>
            </name>
            <name name-style="western">
              <surname>Ferrari</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <article-title>Identifying social media user demographics and topic diversity with computational social science: a case study of a major international policy forum</article-title>
          <source>J Comput Soc Sci</source>
          <year>2020</year>
          <month>01</month>
          <day>07</day>
          <volume>3</volume>
          <fpage>167</fpage>
          <lpage>88</lpage>
          <pub-id pub-id-type="doi">10.1007/s42001-019-00061-9</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref57">
        <label>57</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bsir</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Zrigui</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Enhancing deep learning gender identification with gated recurrent units architecture in social text</article-title>
          <source>Computacion y Sistemas</source>
          <year>2018</year>
          <month>09</month>
          <day>30</day>
          <volume>22</volume>
          <issue>3</issue>
          <fpage>757</fpage>
          <lpage>66</lpage>
          <pub-id pub-id-type="doi">10.13053/CyS-22-3-3036</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref58">
        <label>58</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bsir</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Zrigui</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Document model with attention bidirectional recurrent network for gender identification</article-title>
          <source>Proceedings of the 15th International Work-Conference on Artificial Neural Networks</source>
          <conf-name>IWANN 2019</conf-name>
          <conf-date>June 12-14, 2019</conf-date>
          <conf-loc>Gran Canaria, Spain</conf-loc>
          <pub-id pub-id-type="doi">10.1007/978-3-030-20521-8_51</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref59">
        <label>59</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cavazos-Rehg</surname>
              <given-names>PA</given-names>
            </name>
            <name name-style="western">
              <surname>Zewdie</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Krauss</surname>
              <given-names>MJ</given-names>
            </name>
            <name name-style="western">
              <surname>Sowles</surname>
              <given-names>SJ</given-names>
            </name>
          </person-group>
          <article-title>"No high like a brownie high": a content analysis of edible marijuana tweets</article-title>
          <source>Am J Health Promot</source>
          <year>2018</year>
          <month>05</month>
          <volume>32</volume>
          <issue>4</issue>
          <fpage>880</fpage>
          <lpage>6</lpage>
          <pub-id pub-id-type="doi">10.1177/0890117116686574</pub-id>
          <pub-id pub-id-type="medline">29214836</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref60">
        <label>60</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cavazos-Rehg</surname>
              <given-names>PA</given-names>
            </name>
            <name name-style="western">
              <surname>Krauss</surname>
              <given-names>MJ</given-names>
            </name>
            <name name-style="western">
              <surname>Costello</surname>
              <given-names>SJ</given-names>
            </name>
            <name name-style="western">
              <surname>Kaiser</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Cahn</surname>
              <given-names>ES</given-names>
            </name>
            <name name-style="western">
              <surname>Fitzsimmons-Craft</surname>
              <given-names>EE</given-names>
            </name>
            <name name-style="western">
              <surname>Wilfley</surname>
              <given-names>DE</given-names>
            </name>
          </person-group>
          <article-title>"I just want to be skinny.": a content analysis of tweets expressing eating disorder symptoms</article-title>
          <source>PLoS One</source>
          <year>2019</year>
          <month>01</month>
          <day>16</day>
          <volume>14</volume>
          <issue>1</issue>
          <fpage>e0207506</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.plos.org/10.1371/journal.pone.0207506"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0207506</pub-id>
          <pub-id pub-id-type="medline">30650072</pub-id>
          <pub-id pub-id-type="pii">PONE-D-17-15569</pub-id>
          <pub-id pub-id-type="pmcid">PMC6334988</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref61">
        <label>61</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cesare</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Dwivedi</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Nguyen</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Nsoesie</surname>
              <given-names>EO</given-names>
            </name>
          </person-group>
          <article-title>Use of social media, search queries, and demographic data to assess obesity prevalence in the United States</article-title>
          <source>Palgrave Commun</source>
          <year>2019</year>
          <month>9</month>
          <day>17</day>
          <volume>5</volume>
          <issue>1</issue>
          <fpage>106</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1057/s41599-019-0314-x"/>
          </comment>
          <pub-id pub-id-type="doi">10.1057/s41599-019-0314-x</pub-id>
          <pub-id pub-id-type="medline">32661492</pub-id>
          <pub-id pub-id-type="pii">106</pub-id>
          <pub-id pub-id-type="pmcid">PMC7357895</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref62">
        <label>62</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cesare</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Nguyen</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Grant</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Nsoesie</surname>
              <given-names>EO</given-names>
            </name>
          </person-group>
          <article-title>Social media captures demographic and regional physical activity</article-title>
          <source>BMJ Open Sport Exerc Med</source>
          <year>2019</year>
          <month>07</month>
          <day>14</day>
          <volume>5</volume>
          <issue>1</issue>
          <fpage>e000567</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/31423323"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/bmjsem-2019-000567</pub-id>
          <pub-id pub-id-type="medline">31423323</pub-id>
          <pub-id pub-id-type="pii">bmjsem-2019-000567</pub-id>
          <pub-id pub-id-type="pmcid">PMC6678033</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref63">
        <label>63</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chakraborty</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Messias</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Benevenuto</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Ghosh</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Ganguly</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Gummadi</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Who makes trends? Understanding demographic biases in crowdsourced recommendations</article-title>
          <source>Proceedings of the International AAAI Conference on Web and Social Media</source>
          <conf-name>ICWSM-17</conf-name>
          <conf-date>Montreal, Quebec</conf-date>
          <conf-loc>May 15-18, 2017</conf-loc>
          <pub-id pub-id-type="doi">10.1609/icwsm.v11i1.14894</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref64">
        <label>64</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chamberlain</surname>
              <given-names>BP</given-names>
            </name>
            <name name-style="western">
              <surname>Humby</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Deisenroth</surname>
              <given-names>MP</given-names>
            </name>
          </person-group>
          <article-title>Probabilistic inference of Twitter users’ age based on what they follow</article-title>
          <source>Proceedings of the Joint European Conference on Machine Learning and Knowledge Discovery in Databases (ECML PKDD 2017)</source>
          <conf-name>ECML PKDD 2017</conf-name>
          <conf-date>September 18-22, 2017</conf-date>
          <conf-loc>Skopje, Macedonia</conf-loc>
          <pub-id pub-id-type="doi">10.1007/978-3-319-71273-4_16</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref65">
        <label>65</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cheng</surname>
              <given-names>JK</given-names>
            </name>
            <name name-style="western">
              <surname>Fernandez</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Quindoza</surname>
              <given-names>RG</given-names>
            </name>
            <name name-style="western">
              <surname>Tan</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Cheng</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>A model for age and gender profiling of social media accounts based on post contents</article-title>
          <source>Neural Information Processing</source>
          <year>2018</year>
          <publisher-loc>New York City, NY</publisher-loc>
          <publisher-name>Springer International Publishing</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref66">
        <label>66</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cornelisse</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Pillai</surname>
              <given-names>RG</given-names>
            </name>
          </person-group>
          <article-title>Age inference on Twitter using SAGE and TF-IGM</article-title>
          <source>Proceedings of the 4th International Conference on Natural Language Processing and Information Retrieval</source>
          <year>2020</year>
          <month>12</month>
          <conf-name>NLPIR 2020</conf-name>
          <conf-date>December 18-20, 2020</conf-date>
          <conf-loc>Seoul, Republic of Korea</conf-loc>
          <pub-id pub-id-type="doi">10.1145/3443279.3443300</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref67">
        <label>67</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Duong</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Luo</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Pham</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>The ivory tower lost: how college students respond differently than the general public to the COVID-19 pandemic</article-title>
          <source>Proceedings of the IEEE/ACM International Conference on Advances in Social Networks Analysis and Mining (ASONAM)</source>
          <conf-name>ASONAM 2020</conf-name>
          <conf-date>December 7-10, 2020</conf-date>
          <conf-loc>The Hague, The Netherlands</conf-loc>
          <pub-id pub-id-type="doi">10.1109/asonam49781.2020.9381379</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref68">
        <label>68</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>ElSayed</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Farouk</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Gender identification for Egyptian Arabic dialect in twitter using deep learning models</article-title>
          <source>Egypt Inform J</source>
          <year>2020</year>
          <month>09</month>
          <volume>21</volume>
          <issue>3</issue>
          <fpage>159</fpage>
          <lpage>67</lpage>
          <pub-id pub-id-type="doi">10.1016/j.eij.2020.04.001</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref69">
        <label>69</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Emmery</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Chrupała</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Daelemans</surname>
              <given-names>W</given-names>
            </name>
          </person-group>
          <article-title>Simple queries as distant labels for predicting gender on Twitter</article-title>
          <source>Proceedings of the 3rd Workshop on Noisy User-generated Text</source>
          <conf-name>NUT@EMNLP 2017</conf-name>
          <conf-date>September 7, 2017</conf-date>
          <conf-loc>Copenhagen, Denmark</conf-loc>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://github.com/facebookresearch/"/>
          </comment>
          <pub-id pub-id-type="doi">10.18653/v1/w17-4407</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref70">
        <label>70</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Garcia-Guzman</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Andrade-Ambriz</surname>
              <given-names>YA</given-names>
            </name>
            <name name-style="western">
              <surname>Ibarra-Manzano</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Ledesma</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Gomez</surname>
              <given-names>JC</given-names>
            </name>
            <name name-style="western">
              <surname>Almanza-Ojeda</surname>
              <given-names>DL</given-names>
            </name>
          </person-group>
          <article-title>Trend-based categories recommendations and age-gender prediction for Pinterest and Twitter users</article-title>
          <source>Appl Sci</source>
          <year>2020</year>
          <month>08</month>
          <day>28</day>
          <volume>10</volume>
          <issue>17</issue>
          <fpage>5957</fpage>
          <pub-id pub-id-type="doi">10.3390/app10175957</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref71">
        <label>71</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Geng</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Wei</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Feng</surname>
              <given-names>X</given-names>
            </name>
          </person-group>
          <article-title>Soft biometrics in online social networks: a case study on Twitter user gender recognition</article-title>
          <source>Proceedings of the IEEE Winter Applications of Computer Vision Workshops (WACVW)</source>
          <conf-name>WACVW 2017</conf-name>
          <conf-date>March 24-31, 2017</conf-date>
          <conf-loc>Santa Rosa, CA</conf-loc>
          <pub-id pub-id-type="doi">10.1109/wacvw.2017.8</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref72">
        <label>72</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Giannakopoulos</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Kalatzis</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Roussaki</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Papavassiliou</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Gender recognition based on social networks for multimedia production</article-title>
          <source>Proceedings of the IEEE 13th Image, Video, and Multidimensional Signal Processing Workshop (IVMSP)</source>
          <conf-name>IVMSP 2018</conf-name>
          <conf-date>June 10-12, 2018</conf-date>
          <conf-loc>Aristi Village, Greece</conf-loc>
          <pub-id pub-id-type="doi">10.1109/ivmspw.2018.8448788</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref73">
        <label>73</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Guimaraes</surname>
              <given-names>RG</given-names>
            </name>
            <name name-style="western">
              <surname>Rosa</surname>
              <given-names>RL</given-names>
            </name>
            <name name-style="western">
              <surname>De Gaetano</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Rodriguez</surname>
              <given-names>DZ</given-names>
            </name>
            <name name-style="western">
              <surname>Bressan</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Age groups classification in social network using deep learning</article-title>
          <source>IEEE Access</source>
          <year>2017</year>
          <month>05</month>
          <day>23</day>
          <volume>5</volume>
          <fpage>10805</fpage>
          <lpage>16</lpage>
          <pub-id pub-id-type="doi">10.1109/access.2017.2706674</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref74">
        <label>74</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hasanuzzaman</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Dias</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Way</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Demographic word embeddings for racism detection on Twitter</article-title>
          <source>Proceedings of the Eighth International Joint Conference on Natural Language Processing (Volume 1: Long Papers)</source>
          <conf-name>IJCNLP 2017</conf-name>
          <conf-date>November 28-30, 2017</conf-date>
          <conf-loc>Taipei, Taiwan</conf-loc>
        </nlm-citation>
      </ref>
      <ref id="ref75">
        <label>75</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hashempour</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>A deep learning approach to language-independent gender prediction on Twitter</article-title>
          <source>Proceedings of the 2019 Workshop on Widening NLP</source>
          <conf-name>2019 Workshop on Widening NLP</conf-name>
          <conf-date>July 28, 2019</conf-date>
          <conf-loc>Florence, Italy</conf-loc>
          <pub-id pub-id-type="doi">10.18653/v1/w17-2901</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref76">
        <label>76</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hirt</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Kühl</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Satzger</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Cognitive computing for customer profiling: meta classification for gender prediction</article-title>
          <source>Electron Mark</source>
          <year>2019</year>
          <month>2</month>
          <day>21</day>
          <volume>29</volume>
          <fpage>93</fpage>
          <lpage>106</lpage>
          <pub-id pub-id-type="doi">10.1007/s12525-019-00336-z</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref77">
        <label>77</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Xing</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Dernoncourt</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Paul</surname>
              <given-names>MJ</given-names>
            </name>
          </person-group>
          <article-title>Multilingual Twitter corpus and baselines for evaluating demographic bias in hate speech recognition</article-title>
          <source>Proceedings of the Twelfth Language Resources and Evaluation Conference</source>
          <conf-name>LREC 2020</conf-name>
          <conf-date>May 11-16, 2020</conf-date>
          <conf-loc>Marseille, France</conf-loc>
        </nlm-citation>
      </ref>
      <ref id="ref78">
        <label>78</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Smith</surname>
              <given-names>MC</given-names>
            </name>
            <name name-style="western">
              <surname>Jamison</surname>
              <given-names>AM</given-names>
            </name>
            <name name-style="western">
              <surname>Broniatowski</surname>
              <given-names>DA</given-names>
            </name>
            <name name-style="western">
              <surname>Dredze</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Quinn</surname>
              <given-names>SC</given-names>
            </name>
            <name name-style="western">
              <surname>Cai</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Paul</surname>
              <given-names>MJ</given-names>
            </name>
          </person-group>
          <article-title>Can online self-reports assist in real-time identification of influenza vaccination uptake? A cross-sectional study of influenza vaccine-related tweets in the USA, 2013-2017</article-title>
          <source>BMJ Open</source>
          <year>2019</year>
          <month>01</month>
          <day>15</day>
          <volume>9</volume>
          <issue>1</issue>
          <fpage>e024018</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmjopen.bmj.com/lookup/pmidlookup?view=long&#38;pmid=30647040"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/bmjopen-2018-024018</pub-id>
          <pub-id pub-id-type="medline">30647040</pub-id>
          <pub-id pub-id-type="pii">bmjopen-2018-024018</pub-id>
          <pub-id pub-id-type="pmcid">PMC6340631</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref79">
        <label>79</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hussein</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Farouk</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Hemayed</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Gender identification of Egyptian dialect in twitter</article-title>
          <source>Egypt Inform J</source>
          <year>2019</year>
          <month>07</month>
          <volume>20</volume>
          <issue>2</issue>
          <fpage>109</fpage>
          <lpage>16</lpage>
          <pub-id pub-id-type="doi">10.1016/j.eij.2018.12.002</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref80">
        <label>80</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Jurgens</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Tsvetkov</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Jurafsky</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Writer profiling without the writer’s text</article-title>
          <source>Proceedings of the 9th International Conference, SocInfo 2017</source>
          <conf-name>SocInfo 2017</conf-name>
          <conf-date>September 13-15, 2017</conf-date>
          <conf-loc>Oxford, UK</conf-loc>
          <pub-id pub-id-type="doi">10.1007/978-3-319-67256-4_43</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref81">
        <label>81</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Zhou</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>The public's opinions on a new school meals policy for childhood obesity prevention in the U.S.: a social media analytics approach</article-title>
          <source>Int J Med Inform</source>
          <year>2017</year>
          <month>07</month>
          <volume>103</volume>
          <fpage>83</fpage>
          <lpage>8</lpage>
          <pub-id pub-id-type="doi">10.1016/j.ijmedinf.2017.04.013</pub-id>
          <pub-id pub-id-type="medline">28551006</pub-id>
          <pub-id pub-id-type="pii">S1386-5056(17)30089-8</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref82">
        <label>82</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Khandelwal</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Swami</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Akhtar</surname>
              <given-names>SS</given-names>
            </name>
            <name name-style="western">
              <surname>Shrivastava</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Gender prediction in English-Hindi code-mixed social media content: corpus and baseline system</article-title>
          <source>Computacion y Sistemas</source>
          <year>2018</year>
          <volume>22</volume>
          <issue>3</issue>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.researchgate.net/publication/332549453_Gender_Prediction_in_English-Hindi_Code-Mixed_Social_Media_Content_Corpus_and_Baseline_System"/>
          </comment>
          <pub-id pub-id-type="doi">10.13053/cys-22-4-3061</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref83">
        <label>83</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>SM</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Qu</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Wan</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Paris</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Demographic inference on Twitter using recursive neural networks</article-title>
          <source>Proceedings of the 55th Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers)</source>
          <conf-name>ACL 2017</conf-name>
          <conf-date>July 30-August 4, 2017</conf-date>
          <conf-loc>Vancouver, Canada</conf-loc>
          <pub-id pub-id-type="doi">10.18653/v1/p17-2075</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref84">
        <label>84</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kostakos</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Pandya</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Kyriakouli</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Oussalah</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Inferring demographic data of marginalized users in Twitter with computer vision APIs</article-title>
          <source>Proceedings of the European Intelligence and Security Informatics Conference (EISIC)</source>
          <conf-name>EISIC 2018</conf-name>
          <conf-date>October 24-25, 2018</conf-date>
          <conf-loc>Karlskrona, Sweden</conf-loc>
          <pub-id pub-id-type="doi">10.1109/eisic.2018.00022</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref85">
        <label>85</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ljubešić</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Fišer</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Erjavec</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Language-independent gender prediction on Twitter</article-title>
          <source>Proceedings of the Second Workshop on NLP and Computational Social Science</source>
          <conf-name>NLP+CSS 2017</conf-name>
          <conf-date>August 3, 2017</conf-date>
          <conf-loc>Vancouver, Canada</conf-loc>
          <pub-id pub-id-type="doi">10.18653/v1/W17-2901</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref86">
        <label>86</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>López-Monroy</surname>
              <given-names>AP</given-names>
            </name>
            <name name-style="western">
              <surname>González</surname>
              <given-names>FA</given-names>
            </name>
            <name name-style="western">
              <surname>Solorio</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Early author profiling on Twitter using profile features with multi-resolution</article-title>
          <source>Expert Syst Appl</source>
          <year>2020</year>
          <month>02</month>
          <volume>140</volume>
          <fpage>112909</fpage>
          <pub-id pub-id-type="doi">10.1016/j.eswa.2019.112909</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref87">
        <label>87</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Markov</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Gómez-Adorno</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Posadas-Durán</surname>
              <given-names>JP</given-names>
            </name>
            <name name-style="western">
              <surname>Sidorov</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Gelbukh</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Author profiling with Doc2vec neural network-based document embeddings</article-title>
          <source>Proceedings of the 15th Mexican International Conference on Artificial Intelligence</source>
          <conf-name>MICAI 2016</conf-name>
          <conf-date>October 23-28, 2016</conf-date>
          <conf-loc>Cancún, Mexico</conf-loc>
          <pub-id pub-id-type="doi">10.1007/978-3-319-62428-0_9</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref88">
        <label>88</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Messias</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Vikatos</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Benevenuto</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <article-title>White, man, and highly followed: gender and race inequalities in Twitter</article-title>
          <source>Proceedings of the International Conference on Web Intelligence</source>
          <month>08</month>
          <conf-name>WI '17</conf-name>
          <conf-date>August 23-26, 2017</conf-date>
          <conf-loc>Leipzig, Germany</conf-loc>
          <pub-id pub-id-type="doi">10.1145/3106426.3106472</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref89">
        <label>89</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Miura</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Hirota</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Kato</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Araki</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Endo</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Ishikawa</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Predicting user gender on social media sites using geographical information</article-title>
          <source>Proceedings of the 10th International Conference on Management of Digital EcoSystems</source>
          <month>9</month>
          <conf-name>MEDES '18</conf-name>
          <conf-date>September 25-28, 2018</conf-date>
          <conf-loc>Tokyo, Japan</conf-loc>
          <pub-id pub-id-type="doi">10.1145/3281375.3281383</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref90">
        <label>90</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Morgan-Lopez</surname>
              <given-names>AA</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>AE</given-names>
            </name>
            <name name-style="western">
              <surname>Chew</surname>
              <given-names>RF</given-names>
            </name>
            <name name-style="western">
              <surname>Ruddle</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Predicting age groups of Twitter users based on language and metadata features</article-title>
          <source>PLoS One</source>
          <year>2017</year>
          <volume>12</volume>
          <issue>8</issue>
          <fpage>e0183537</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.plos.org/10.1371/journal.pone.0183537"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0183537</pub-id>
          <pub-id pub-id-type="medline">28850620</pub-id>
          <pub-id pub-id-type="pii">PONE-D-16-41961</pub-id>
          <pub-id pub-id-type="pmcid">PMC5574558</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref91">
        <label>91</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mueller</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Wood-Doughty</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Amir</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Dredze</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Lynn Nobles</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Demographic representation and collective storytelling in the me too Twitter hashtag activism movement</article-title>
          <source>Proc ACM Hum Comput Interact</source>
          <year>2021</year>
          <month>04</month>
          <day>22</day>
          <volume>5</volume>
          <issue>CSCW1</issue>
          <fpage>1</fpage>
          <lpage>28</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/35295189"/>
          </comment>
          <pub-id pub-id-type="doi">10.1145/3449181</pub-id>
          <pub-id pub-id-type="medline">35295189</pub-id>
          <pub-id pub-id-type="pii">107</pub-id>
          <pub-id pub-id-type="pmcid">PMC8920314</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref92">
        <label>92</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mukherjee</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Bala</surname>
              <given-names>PK</given-names>
            </name>
          </person-group>
          <article-title>Gender classification of microblog text based on authorial style</article-title>
          <source>Inf Syst E Bus Manage</source>
          <year>2016</year>
          <month>3</month>
          <day>2</day>
          <volume>15</volume>
          <issue>1</issue>
          <fpage>117</fpage>
          <lpage>38</lpage>
          <pub-id pub-id-type="doi">10.1007/s10257-016-0312-0</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref93">
        <label>93</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Imuede</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Raborife</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Ranchod</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Sentiment analysis as an indicator to evaluate gender disparity on sexual violence tweets in South Africa</article-title>
          <source>Proceedings of the International SAUPEC/RobMech/PRASA Conference</source>
          <conf-name>International SAUPEC/RobMech/PRASA Conference</conf-name>
          <conf-date>January 29-31, 2020</conf-date>
          <conf-loc>Cape Town, South Africa</conf-loc>
          <pub-id pub-id-type="doi">10.1109/saupec/robmech/prasa48453.2020.9040955</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref94">
        <label>94</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Pandya</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Oussalah</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Monachesi</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Kostakos</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Loven</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>On the use of URLs and hashtags in age prediction of Twitter users</article-title>
          <source>Proceedings of the IEEE International Conference on Information Reuse and Integration (IRI)</source>
          <conf-name>IEEE IRI 2018</conf-name>
          <conf-date>July 6-9, 2018</conf-date>
          <conf-loc>Salt Lake City, UT</conf-loc>
          <pub-id pub-id-type="doi">10.1109/iri.2018.00017</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref95">
        <label>95</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Pandya</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Oussalah</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Monachesi</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Kostakos</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>On the use of distributed semantics of tweet metadata for user age prediction</article-title>
          <source>Future Gener Comput Syst</source>
          <year>2020</year>
          <month>01</month>
          <volume>102</volume>
          <fpage>437</fpage>
          <lpage>52</lpage>
          <pub-id pub-id-type="doi">10.1016/j.future.2019.08.018</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref96">
        <label>96</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Pizarro</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Profiling bots and fake news spreaders at PAN’19 and PAN’20 : bots and gender profiling 2019, profiling fake news spreaders on Twitter 2020</article-title>
          <source>Proceedings of the IEEE 7th International Conference on Data Science and Advanced Analytics (DSAA)</source>
          <conf-name>DSAA 2020</conf-name>
          <conf-date>October 6-9, 2020</conf-date>
          <conf-loc>Sydney, Australia</conf-loc>
          <pub-id pub-id-type="doi">10.1109/dsaa49011.2020.00088</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref97">
        <label>97</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Reis</surname>
              <given-names>JC</given-names>
            </name>
            <name name-style="western">
              <surname>Kwak</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>An</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Messias</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Benevenuto</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <article-title>Demographics of news sharing in the U.S. Twittersphere</article-title>
          <source>Proceedings of the 28th ACM Conference on Hypertext and Social Media</source>
          <conf-name>HT'17</conf-name>
          <conf-date>July 4-7, 2017</conf-date>
          <conf-loc>Prague, Czech Republic</conf-loc>
          <pub-id pub-id-type="doi">10.1145/3078714.3078734</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref98">
        <label>98</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Serfass</surname>
              <given-names>DG</given-names>
            </name>
          </person-group>
          <article-title>Assessing situations on social media: temporal, demographic, and personality influences on situation experience</article-title>
          <source>Florida Atlantic University </source>
          <year>2016</year>
          <access-date>2023-03-30</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.proquest.com/openview/6a5ca1b98806d5f3a2ce7e66e2f358cd/1?pq-origsite=gscholar&#38;cbl=18750">https://www.proquest.com/openview/6a5ca1b98806d5f3a2ce7e66e2f358cd/1?pq-origsite=gscholar&#38;cbl=18750</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref99">
        <label>99</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Stevens</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Bonett</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Bannon</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Chittamuru</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Slaff</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Browne</surname>
              <given-names>SK</given-names>
            </name>
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Bauermeister</surname>
              <given-names>JA</given-names>
            </name>
          </person-group>
          <article-title>Association between HIV-related Tweets and HIV incidence in the United States: infodemiology study</article-title>
          <source>J Med Internet Res</source>
          <year>2020</year>
          <month>06</month>
          <day>24</day>
          <volume>22</volume>
          <issue>6</issue>
          <fpage>e17196</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2020/6/e17196/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/17196</pub-id>
          <pub-id pub-id-type="medline">32579119</pub-id>
          <pub-id pub-id-type="pii">v22i6e17196</pub-id>
          <pub-id pub-id-type="pmcid">PMC7380998</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref100">
        <label>100</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Stevens</surname>
              <given-names>RC</given-names>
            </name>
            <name name-style="western">
              <surname>Brawner</surname>
              <given-names>BM</given-names>
            </name>
            <name name-style="western">
              <surname>Kranzler</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Giorgi</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Lazarus</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Abera</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Ungar</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Exploring substance use Tweets of youth in the United States: mixed methods study</article-title>
          <source>JMIR Public Health Surveill</source>
          <year>2020</year>
          <month>03</month>
          <day>26</day>
          <volume>6</volume>
          <issue>1</issue>
          <fpage>e16191</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://publichealth.jmir.org/2020/1/e16191/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/16191</pub-id>
          <pub-id pub-id-type="medline">32213472</pub-id>
          <pub-id pub-id-type="pii">v6i1e16191</pub-id>
          <pub-id pub-id-type="pmcid">PMC7146240</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref101">
        <label>101</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Swain</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Seeja</surname>
              <given-names>KR</given-names>
            </name>
          </person-group>
          <article-title>TWEESENT: a web application on sentiment analysis</article-title>
          <source>Proceedings of the Second International Conference on Smart Innovations in Communications and Computational Sciences</source>
          <year>2019</year>
          <conf-name>ICSICCS-2018</conf-name>
          <conf-date>April 28-29, 2018</conf-date>
          <conf-loc>Indore, India</conf-loc>
          <pub-id pub-id-type="doi">10.1007/978-981-13-2414-7_36</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref102">
        <label>102</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Thelwall</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Thelwall</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Covid-19 tweeting in English: gender differences</article-title>
          <source>Prof De La Inf</source>
          <year>2020</year>
          <month>05</month>
          <day>04</day>
          <volume>29</volume>
          <issue>3</issue>
          <pub-id pub-id-type="doi">10.3145/epi.2020.may.01</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref103">
        <label>103</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Udayakumar</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Senadeera</surname>
              <given-names>DC</given-names>
            </name>
            <name name-style="western">
              <surname>Yamunarani</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Cheon</surname>
              <given-names>NJ</given-names>
            </name>
          </person-group>
          <article-title>Demographics analysis of Twitter users who tweeted on psychological articles and tweets analysis</article-title>
          <source>Procedia Comput Sci</source>
          <year>2018</year>
          <volume>144</volume>
          <fpage>96</fpage>
          <lpage>104</lpage>
          <pub-id pub-id-type="doi">10.1016/j.procs.2018.10.509</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref104">
        <label>104</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>van der Goot</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Ljubešić</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Matroos</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Nissim</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Plank</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>Bleaching text: abstract features for cross-lingual gender prediction</article-title>
          <source>Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers)</source>
          <conf-name>ACL 2018</conf-name>
          <conf-date>July 15-20, 2018</conf-date>
          <conf-loc>Melbourne, Australia</conf-loc>
          <pub-id pub-id-type="doi">10.18653/v1/p18-2061</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref105">
        <label>105</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Vashisth</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Meehan</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Gender classification using Twitter text data</article-title>
          <source>Proceedings of the 31st Irish Signals and Systems Conference (ISSC)</source>
          <conf-name>ISSC 2020</conf-name>
          <conf-date>June 11-12, 2020</conf-date>
          <conf-loc>Letterkenny, Ireland</conf-loc>
          <pub-id pub-id-type="doi">10.1109/issc49989.2020.9180161</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref106">
        <label>106</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Verhoeven</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Škrjanec</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Pollak</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Gender profiling for Slovene Twitter communication: the influence of gender marking, content and style</article-title>
          <source>Proceedings of the 6th Workshop on Balto-Slavic Natural Language Processing</source>
          <conf-name>BSNLP 2017</conf-name>
          <conf-date>April 4, 2017</conf-date>
          <conf-loc>Valencia, Spain</conf-loc>
          <pub-id pub-id-type="doi">10.18653/v1/w17-1418</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref107">
        <label>107</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Vicente</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Batista</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Carvalho</surname>
              <given-names>JP</given-names>
            </name>
          </person-group>
          <person-group person-group-type="editor">
            <name name-style="western">
              <surname>Kóczy</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Medina-Moreno</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Ramírez-Poussa</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Gender detection of Twitter users based on multiple information sources</article-title>
          <source>Interactions Between Computational Intelligence and Mathematics Part 2</source>
          <year>2018</year>
          <month>11</month>
          <day>03</day>
          <publisher-loc>Cham, Switzerland</publisher-loc>
          <publisher-name>Springer</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref108">
        <label>108</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Vijayaraghavan</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Vosoughi</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Roy</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Twitter demographic classification using deep multi-modal multi-task learning</article-title>
          <source>Proceedings of the 55th Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers)</source>
          <conf-name>ACL 2017</conf-name>
          <conf-date>July 30-August 4, 2017</conf-date>
          <conf-loc>Vancouver, Canada</conf-loc>
          <pub-id pub-id-type="doi">10.18653/v1/p17-2076</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref109">
        <label>109</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Vikatos</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Messias</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Miranda</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Benevenuto</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <article-title>Linguistic diversities of demographic groups in Twitter</article-title>
          <source>Proceedings of the 28th ACM Conference on Hypertext and Social Media</source>
          <conf-name>HT'17</conf-name>
          <conf-date>July 4-7, 2017</conf-date>
          <conf-loc>Prague, Czech Republic</conf-loc>
          <pub-id pub-id-type="doi">10.1145/3078714.3078742</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref110">
        <label>110</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Volkova</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Predicting demographics and affect in social networks</article-title>
          <source>The Johns Hopkins University</source>
          <year>2015</year>
          <month>10</month>
          <access-date>2023-03-30</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://jscholarship.library.jhu.edu/bitstream/handle/1774.2/39639/VOLKOVA-DISSERTATION-2015.pdf?sequence=1&#38;isAllowed=y">https://jscholarship.library.jhu.edu/bitstream/handle/1774.2/39639/VOLKOVA-DISSERTATION-2015.pdf?sequence=1&#38;isAllowed=y</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref111">
        <label>111</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Feng</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Luo</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Gender politics in the 2016 U.S. Presidential election: a computer vision approach</article-title>
          <source>Proceedings of the 10th International Conference, SBP-BRiMS 2017</source>
          <conf-name>SBP-BRiMS 2017</conf-name>
          <conf-date>July 5-8, 2017</conf-date>
          <conf-loc>Washington, DC</conf-loc>
          <pub-id pub-id-type="doi">10.1007/978-3-319-60240-0_4</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref112">
        <label>112</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Hale</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Adelani</surname>
              <given-names>DI</given-names>
            </name>
            <name name-style="western">
              <surname>Grabowicz</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Hartman</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Flöck</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Jurgens</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Demographic inference and representative population estimates from multilingual social media data</article-title>
          <source>Proceedings of the The World Wide Web Conference</source>
          <conf-name>WWW '19</conf-name>
          <conf-date>May 13-17, 2019</conf-date>
          <conf-loc>San Francisco, CA</conf-loc>
          <pub-id pub-id-type="doi">10.1145/3308558.3313684</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref113">
        <label>113</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wong</surname>
              <given-names>SC</given-names>
            </name>
            <name name-style="western">
              <surname>Teh</surname>
              <given-names>PL</given-names>
            </name>
            <name name-style="western">
              <surname>Cheng</surname>
              <given-names>CB</given-names>
            </name>
          </person-group>
          <article-title>How different genders use profanity on Twitter?</article-title>
          <source>Proceedings of the 2020 the 4th International Conference on Compute and Data Analysis</source>
          <conf-name>ICCDA 2020</conf-name>
          <conf-date>March 9-12, 2020</conf-date>
          <conf-loc>Silicon Valley, CA</conf-loc>
          <pub-id pub-id-type="doi">10.1145/3388142.3388145</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref114">
        <label>114</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wood-Doughty</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Smith</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Broniatowski</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Dredze</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>How does Twitter user behavior vary across demographic groups?</article-title>
          <source>Proceedings of the Second Workshop on NLP and Computational Social Science</source>
          <conf-name>NLP+CSS 2017</conf-name>
          <conf-date>August 3, 2017</conf-date>
          <conf-loc>Vancouver, Canada</conf-loc>
          <pub-id pub-id-type="doi">10.18653/v1/w17-2912</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref115">
        <label>115</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wood-Doughty</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Andrews</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Marvin</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Dredze</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Predicting Twitter user demographics from names alone</article-title>
          <source>Proceedings of the Second Workshop on Computational Modeling of People’s Opinions, Personality, and Emotions in Social Media</source>
          <conf-name>PEOPLES 2018</conf-name>
          <conf-date>June 6, 2018</conf-date>
          <conf-loc>New Orleans, LA</conf-loc>
          <pub-id pub-id-type="doi">10.18653/v1/w18-1114</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref116">
        <label>116</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Xiang</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Sang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Demographic attribute inference from social multimedia behaviors: a cross-OSN approach</article-title>
          <source>Proceedings of the 23rd International Conference, MMM 2017</source>
          <conf-name>MMM 2017</conf-name>
          <conf-date>January 4-6, 2017</conf-date>
          <conf-loc>Reykjavik, Iceland</conf-loc>
          <pub-id pub-id-type="doi">10.1007/978-3-319-51811-4_42</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref117">
        <label>117</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>YC</given-names>
            </name>
            <name name-style="western">
              <surname>Al-Garadi</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Love</surname>
              <given-names>JS</given-names>
            </name>
            <name name-style="western">
              <surname>Perrone</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Sarker</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Automatic gender detection in Twitter profiles for health-related cohort studies</article-title>
          <source>JAMIA Open</source>
          <year>2021</year>
          <month>06</month>
          <day>23</day>
          <volume>4</volume>
          <issue>2</issue>
          <fpage>ooab042</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/34169232"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/jamiaopen/ooab042</pub-id>
          <pub-id pub-id-type="medline">34169232</pub-id>
          <pub-id pub-id-type="pii">ooab042</pub-id>
          <pub-id pub-id-type="pmcid">PMC8220305</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref118">
        <label>118</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yazdavar</surname>
              <given-names>AH</given-names>
            </name>
            <name name-style="western">
              <surname>Mahdavinejad</surname>
              <given-names>MS</given-names>
            </name>
            <name name-style="western">
              <surname>Bajaj</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Romine</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Sheth</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Monadjemi</surname>
              <given-names>AH</given-names>
            </name>
            <name name-style="western">
              <surname>Thirunarayan</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Meddar</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Myers</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Pathak</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Hitzler</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Multimodal mental health analysis in social media</article-title>
          <source>PLoS One</source>
          <year>2020</year>
          <month>4</month>
          <day>10</day>
          <volume>15</volume>
          <issue>4</issue>
          <fpage>e0226248</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.plos.org/10.1371/journal.pone.0226248"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0226248</pub-id>
          <pub-id pub-id-type="medline">32275658</pub-id>
          <pub-id pub-id-type="pii">PONE-D-19-03990</pub-id>
          <pub-id pub-id-type="pmcid">PMC7147779</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref119">
        <label>119</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yildiz</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Munson</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Vitali</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Tinati</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Holland</surname>
              <given-names>JA</given-names>
            </name>
          </person-group>
          <article-title>Using Twitter data for demographic research</article-title>
          <source>Demogr Res</source>
          <year>2017</year>
          <month>11</month>
          <day>22</day>
          <volume>37</volume>
          <fpage>1477</fpage>
          <lpage>514</lpage>
          <pub-id pub-id-type="doi">10.4054/demres.2017.37.46</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref120">
        <label>120</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Hu</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Understanding concerns, sentiments, and disparities among population groups during the COVID-19 pandemic via Twitter data mining: large-scale cross-sectional study</article-title>
          <source>J Med Internet Res</source>
          <year>2021</year>
          <month>03</month>
          <day>05</day>
          <volume>23</volume>
          <issue>3</issue>
          <fpage>e26482</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2021/3/e26482/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/26482</pub-id>
          <pub-id pub-id-type="medline">33617460</pub-id>
          <pub-id pub-id-type="pii">v23i3e26482</pub-id>
          <pub-id pub-id-type="pmcid">PMC7939057</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref121">
        <label>121</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Huo</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Guo</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Prosperi</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Bian</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Mining Twitter to assess the determinants of health behavior towards palliative care in the United States</article-title>
          <source>AMIA Jt Summits Transl Sci Proc</source>
          <year>2020</year>
          <fpage>730</fpage>
          <lpage>9</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/32477696"/>
          </comment>
          <pub-id pub-id-type="medline">32477696</pub-id>
          <pub-id pub-id-type="pmcid">PMC7233059</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref122">
        <label>122</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cesare</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Grant</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Hawkins</surname>
              <given-names>JB</given-names>
            </name>
            <name name-style="western">
              <surname>Brownstein</surname>
              <given-names>JS</given-names>
            </name>
            <name name-style="western">
              <surname>Nsoesie</surname>
              <given-names>EO</given-names>
            </name>
          </person-group>
          <article-title>Demographics in social media data for public health research: does it matter?</article-title>
          <source>arXiv. Preprint posted online October 30, 2017</source>
          <year>2023</year>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1710.11048"/>
          </comment>
          <pub-id pub-id-type="doi">10.48550/arXiv.1710.11048</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref123">
        <label>123</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rangel</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Rosso</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Koppel</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Stamatatos</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Inches</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Overview of the author profiling task at PAN 2013</article-title>
          <source>Proceedings of the CLEF Conference on Multilingual and Multimodal Information Access Evaluation</source>
          <conf-name>CLEF 2013</conf-name>
          <conf-date>September 23-26, 2013</conf-date>
          <conf-loc>Valencia, Spain</conf-loc>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://riunet.upv.es/handle/10251/46636"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref124">
        <label>124</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rangel</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Rosso</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Chugur</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Potthast</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Trenkmann</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Stein</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Verhoeven</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Daelemans</surname>
              <given-names>W</given-names>
            </name>
          </person-group>
          <article-title>Overview of the 2nd author profiling task at pan 2014</article-title>
          <source>Proceedings of the 2014 Computer Science Workshops</source>
          <conf-name>CEUR-WS '14</conf-name>
          <conf-date>March 18, 2014</conf-date>
          <conf-loc>Sheffield, UK</conf-loc>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://ceur-ws.org/Vol-1180/CLEF2014wn-Pan-RangelEt2014.pdf"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref125">
        <label>125</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rangel</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Celli</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Rosso</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Potthast</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Stein</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Daelemans</surname>
              <given-names>W</given-names>
            </name>
          </person-group>
          <article-title>Overview of the 3rd author profiling task at PAN 2015</article-title>
          <source>Proceedings of the CLEF 2015 Conference and Labs of the Evaluation Forum</source>
          <conf-name>CLEF 2015</conf-name>
          <conf-date>September 8-11, 2015</conf-date>
          <conf-loc>Toulouse, France</conf-loc>
        </nlm-citation>
      </ref>
      <ref id="ref126">
        <label>126</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rangel</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Rosso</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Verhoeven</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Daelemans</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Potthast</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Stein</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>Overview of the 4th author profiling task at PAN 2016: cross-genre evaluations</article-title>
          <source>Proceedings of the Conference and Labs of the Evaluation Forum</source>
          <conf-name>CLEF 2016</conf-name>
          <conf-date>September 5-8, 2016</conf-date>
          <conf-loc>Evora, Portugal</conf-loc>
        </nlm-citation>
      </ref>
      <ref id="ref127">
        <label>127</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rangel</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Rosso</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Potthast</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Stein</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>Overview of the 5th author profiling task at PAN 2017: gender and language variety identification in Twitter</article-title>
          <source>Proceedings of the Conference and Labs of the Evaluation Forum</source>
          <conf-name>CLEF 2017</conf-name>
          <conf-date>September 11-14, 2017</conf-date>
          <conf-loc>Dublin, Ireland</conf-loc>
        </nlm-citation>
      </ref>
      <ref id="ref128">
        <label>128</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rangel</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Rosso</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Montes-y-Gómez</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Potthast</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Stein</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>Overview of the 6th author profiling task at PAN 2018: multimodal gender identification in Twitter</article-title>
          <source>Proceedings of the Conference and Labs of the Evaluation Forum</source>
          <conf-name>CLEF 2018</conf-name>
          <conf-date>September 10-14, 2018</conf-date>
          <conf-loc>Avignon, France</conf-loc>
        </nlm-citation>
      </ref>
      <ref id="ref129">
        <label>129</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rangel</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Rosso</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Overview of the 7th author profiling task at PAN 2019: bots and gender profiling in Twitter</article-title>
          <source>Proceedings of the Conference and Labs of the Evaluation Forum</source>
          <conf-name>CLEF 2019</conf-name>
          <conf-date>September 9-12, 2019</conf-date>
          <conf-loc>Lugano, Switzerland</conf-loc>
        </nlm-citation>
      </ref>
      <ref id="ref130">
        <label>130</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Burger</surname>
              <given-names>JD</given-names>
            </name>
            <name name-style="western">
              <surname>Henderson</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Zarrella</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Discriminating gender on Twitter</article-title>
          <source>Proceedings of the 2011 Conference on Empirical Methods in Natural Language Processing</source>
          <conf-name>EMNLP 2011</conf-name>
          <conf-date>July 27-31, 2011</conf-date>
          <conf-loc>Edinburgh, UK</conf-loc>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://aclanthology.org/D11-1120/"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref131">
        <label>131</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Volkova</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Yarowsky</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Improving gender prediction of social media users via weighted annotator rationales</article-title>
          <source>Johns Hopkins University</source>
          <access-date>2023-03-30</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://hltcoe.jhu.edu/wp-content/uploads/2016/11/17310_slides.pdf">https://hltcoe.jhu.edu/wp-content/uploads/2016/11/17310_slides.pdf</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref132">
        <label>132</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Volkova</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Wilson</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Yarowsky</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Exploring demographic language variations to improve multilingual sentiment analysis in social media</article-title>
          <source>Proceedings of the 2013 Conference on Empirical Methods in Natural Language Processing</source>
          <conf-name>EMNLP 2013</conf-name>
          <conf-date>October 18-21, 2013</conf-date>
          <conf-loc>Seattle, WA</conf-loc>
        </nlm-citation>
      </ref>
      <ref id="ref133">
        <label>133</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Ruths</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>What’s in a name? Using first names as features for gender inference in Twitter</article-title>
          <source>Proceedings of the AAAI 2013 Spring Symposium Series</source>
          <conf-name>AAAI 2013 Spring Symposium Series</conf-name>
          <conf-date>March 25-27, 2013</conf-date>
          <conf-loc>Palo Alto, CA</conf-loc>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://aaai.org/papers/05744-whats-in-a-name-using-first-names-as-features-for-gender-inference-in-twitter/"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref134">
        <label>134</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Plank</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Hovy</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Personality traits on Twitter—or—how to get 1,500 personality tests in a week</article-title>
          <source>Proceedings of the 6th Workshop on Computational Approaches to Subjectivity, Sentiment and Social Media Analysis</source>
          <conf-name>WASSA 2015</conf-name>
          <conf-date>September 17, 2015</conf-date>
          <conf-loc>Lisboa, Portugal</conf-loc>
          <pub-id pub-id-type="doi">10.18653/v1/w15-2913</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref135">
        <label>135</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Verhoeven</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Daelemans</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Plank</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>TwiSty: a multilingual twitter stylometry corpus for gender and personality profiling</article-title>
          <source>Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC'16)</source>
          <conf-name>LREC'16</conf-name>
          <conf-date>May 23-28, 2016</conf-date>
          <conf-loc>Portorož, Slovenia</conf-loc>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.clips.uantwerpen.be/"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref136">
        <label>136</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chauhan</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Gender classification dataset</article-title>
          <source>Kaggle</source>
          <access-date>2022-10-17</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.kaggle.com/datasets/cashutosh/gender-classification-dataset">https://www.kaggle.com/datasets/cashutosh/gender-classification-dataset</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref137">
        <label>137</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Radford</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Piloting a theory-based approach to inferring gender in big data</article-title>
          <source>Proceedings of the IEEE International Conference on Big Data (Big Data)</source>
          <conf-name>IEEE BigData 2017</conf-name>
          <conf-date>December 11-14, 2017</conf-date>
          <conf-loc>Boston, MA</conf-loc>
          <pub-id pub-id-type="doi">10.1109/bigdata.2017.8258555</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref138">
        <label>138</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Pizarro</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Using N-grams to detect Bots on Twitter Notebook for PAN at CLEF 2019</article-title>
          <source>Proceedings of the Conference and Labs of the Evaluation Forum</source>
          <conf-name>CLEF 2019</conf-name>
          <conf-date>September 9-12, 2019</conf-date>
          <conf-loc>Lugano, Switzerland</conf-loc>
        </nlm-citation>
      </ref>
      <ref id="ref139">
        <label>139</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Knowles</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Carroll</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Dredze</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Demographer: extremely simple name demographics</article-title>
          <source>Proceedings of the First Workshop on NLP and Computational Social Science</source>
          <conf-name>NLP+CSS 2016</conf-name>
          <conf-date>November 5, 2016</conf-date>
          <conf-loc>Austin, Texas</conf-loc>
          <pub-id pub-id-type="doi">10.18653/v1/w16-5614</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref140">
        <label>140</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Volkova</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Coppersmith</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Van Durme</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>Inferring user political preferences from streaming communications</article-title>
          <source>Proceedings of the 52nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)</source>
          <conf-name>ACL 2014</conf-name>
          <conf-date>June 22-27, 2014</conf-date>
          <conf-loc>Baltimore, MD</conf-loc>
          <pub-id pub-id-type="doi">10.3115/v1/p14-1018</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref141">
        <label>141</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sap</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Park</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Eichstaedt</surname>
              <given-names>JC</given-names>
            </name>
            <name name-style="western">
              <surname>Kern</surname>
              <given-names>ML</given-names>
            </name>
            <name name-style="western">
              <surname>Stillwell</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Kosinski</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Ungar</surname>
              <given-names>LH</given-names>
            </name>
            <name name-style="western">
              <surname>Schwartz</surname>
              <given-names>HA</given-names>
            </name>
          </person-group>
          <article-title>Developing age and gender predictive lexica over social media</article-title>
          <source>Proceedings of the 2014 Conference on Empirical Methods in Natural Language Processing (EMNLP)</source>
          <conf-name>EMNLP 2014</conf-name>
          <conf-date>October 25-29, 2014</conf-date>
          <conf-loc>Doha, Qatar</conf-loc>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://aclanthology.org/D14-1121.pdf"/>
          </comment>
          <pub-id pub-id-type="doi">10.3115/v1/d14-1121</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref142">
        <label>142</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhou</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Fan</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Cao</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Jiang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Yin</surname>
              <given-names>Q</given-names>
            </name>
          </person-group>
          <article-title>Extensive facial landmark localization with coarse-to-fine convolutional network cascade</article-title>
          <source>Proceedings of the IEEE International Conference on Computer Vision Workshops</source>
          <conf-name>ICCV 2013</conf-name>
          <conf-date>December 02-08, 2013</conf-date>
          <conf-loc>Sydney, Australia</conf-loc>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://ieeexplore.ieee.org/document/6755923"/>
          </comment>
          <pub-id pub-id-type="doi">10.1109/iccvw.2013.58</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref143">
        <label>143</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wood-Doughty</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Dredze</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Using noisy self-reports to predict Twitter user demographics</article-title>
          <source>arXiv. Preprint posted online May 1, 2020</source>
          <year>2023</year>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://arxiv.org/abs/2005.00635"/>
          </comment>
          <pub-id pub-id-type="doi">10.18653/v1/2021.socialnlp-1.11</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref144">
        <label>144</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rothe</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Timofte</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Van Gool</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Deep expectation of real and apparent age from a single image without facial landmarks</article-title>
          <source>Int J Comput Vis</source>
          <year>2016</year>
          <month>8</month>
          <day>10</day>
          <volume>126</volume>
          <issue>2-4</issue>
          <fpage>144</fpage>
          <lpage>57</lpage>
          <pub-id pub-id-type="doi">10.1007/s11263-016-0940-3</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref145">
        <label>145</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Chaudhari</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Davatzikos</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Bias in machine learning models can be significantly mitigated by careful training: evidence from neuroimaging studies</article-title>
          <source>Proc Natl Acad Sci U S A</source>
          <year>2023</year>
          <month>02</month>
          <day>07</day>
          <volume>120</volume>
          <issue>6</issue>
          <fpage>e2211613120</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.pnas.org/doi/abs/10.1073/pnas.2211613120?url_ver=Z39.88-2003&#38;rfr_id=ori:rid:crossref.org&#38;rfr_dat=cr_pub  0pubmed"/>
          </comment>
          <pub-id pub-id-type="doi">10.1073/pnas.2211613120</pub-id>
          <pub-id pub-id-type="medline">36716365</pub-id>
          <pub-id pub-id-type="pmcid">PMC9962919</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref146">
        <label>146</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Geifman</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Cohen</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Rubin</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Redefining meaningful age groups in the context of disease</article-title>
          <source>Age (Dordr)</source>
          <year>2013</year>
          <month>12</month>
          <volume>35</volume>
          <issue>6</issue>
          <fpage>2357</fpage>
          <lpage>66</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/23354682"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/s11357-013-9510-6</pub-id>
          <pub-id pub-id-type="medline">23354682</pub-id>
          <pub-id pub-id-type="pmcid">PMC3825015</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref147">
        <label>147</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sera</surname>
              <given-names>LC</given-names>
            </name>
            <name name-style="western">
              <surname>McPherson</surname>
              <given-names>ML</given-names>
            </name>
          </person-group>
          <article-title>Pharmacokinetics and pharmacodynamic changes associated with aging and implications for drug therapy</article-title>
          <source>Clin Geriatr Med</source>
          <year>2012</year>
          <month>05</month>
          <volume>28</volume>
          <issue>2</issue>
          <fpage>273</fpage>
          <lpage>86</lpage>
          <pub-id pub-id-type="doi">10.1016/j.cger.2012.01.007</pub-id>
          <pub-id pub-id-type="medline">22500543</pub-id>
          <pub-id pub-id-type="pii">S0749-0690(12)00008-0</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref148">
        <label>148</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gonzalez-Hernandez</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Weissenbacher</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Proceedings of the seventh workshop on social media mining for health applications, workshop and shared task</article-title>
          <source>Proceedings of the Seventh Workshop on Social Media Mining for Health Applications, Workshop &#38; Shared Task</source>
          <conf-name>SMM4H '22</conf-name>
          <conf-date>October 12-17, 2022</conf-date>
          <conf-loc>Gyeongju, Republic of Korea</conf-loc>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://aclanthology.org/2022.smm4h-1.0/"/>
          </comment>
          <pub-id pub-id-type="doi">10.18653/v1/w18-5904</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref149">
        <label>149</label>
        <nlm-citation citation-type="web">
          <article-title>Distribution of Twitter users worldwide as of January, 2021, by gender</article-title>
          <source>Statista</source>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.statista.com/statistics/828092/distribution-of-users-on-twitter-worldwide-gender/">https://www.statista.com/statistics/828092/distribution-of-users-on-twitter-worldwide-gender/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref150">
        <label>150</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mauvais-Jarvis</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Bairey Merz</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Barnes</surname>
              <given-names>PJ</given-names>
            </name>
            <name name-style="western">
              <surname>Brinton</surname>
              <given-names>RD</given-names>
            </name>
            <name name-style="western">
              <surname>Carrero</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>DeMeo</surname>
              <given-names>DL</given-names>
            </name>
            <name name-style="western">
              <surname>De Vries</surname>
              <given-names>GJ</given-names>
            </name>
            <name name-style="western">
              <surname>Epperson</surname>
              <given-names>CN</given-names>
            </name>
            <name name-style="western">
              <surname>Govindan</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Klein</surname>
              <given-names>SL</given-names>
            </name>
            <name name-style="western">
              <surname>Lonardo</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Maki</surname>
              <given-names>PM</given-names>
            </name>
            <name name-style="western">
              <surname>McCullough</surname>
              <given-names>LD</given-names>
            </name>
            <name name-style="western">
              <surname>Regitz-Zagrosek</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Regensteiner</surname>
              <given-names>JG</given-names>
            </name>
            <name name-style="western">
              <surname>Rubin</surname>
              <given-names>JB</given-names>
            </name>
            <name name-style="western">
              <surname>Sandberg</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Suzuki</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Sex and gender: modifiers of health, disease, and medicine</article-title>
          <source>Lancet</source>
          <year>2020</year>
          <month>08</month>
          <volume>396</volume>
          <issue>10250</issue>
          <fpage>565</fpage>
          <lpage>82</lpage>
          <pub-id pub-id-type="doi">10.1016/s0140-6736(20)31561-0</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref151">
        <label>151</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Klein</surname>
              <given-names>AZ</given-names>
            </name>
            <name name-style="western">
              <surname>Magge</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Gonzalez-Hernandez</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>ReportAGE: automatically extracting the exact age of Twitter users based on self-reports in tweets</article-title>
          <source>PLoS One</source>
          <year>2022</year>
          <volume>17</volume>
          <issue>1</issue>
          <fpage>e0262087</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.plos.org/10.1371/journal.pone.0262087"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0262087</pub-id>
          <pub-id pub-id-type="medline">35077484</pub-id>
          <pub-id pub-id-type="pii">PONE-D-21-08851</pub-id>
          <pub-id pub-id-type="pmcid">PMC8789116</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref152">
        <label>152</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sloan</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Morgan</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Housley</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Williams</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Edwards</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Burnap</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Rana</surname>
              <given-names>O</given-names>
            </name>
          </person-group>
          <article-title>Knowing the tweeters: deriving sociologically relevant demographics from Twitter</article-title>
          <source>Sociological Res Online</source>
          <year>2013</year>
          <month>08</month>
          <day>31</day>
          <volume>18</volume>
          <issue>3</issue>
          <fpage>74</fpage>
          <lpage>84</lpage>
          <pub-id pub-id-type="doi">10.5153/sro.3001</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref153">
        <label>153</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sloan</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Who tweets in the United Kingdom? Profiling the Twitter population using the British social attitudes survey 2015</article-title>
          <source>Soc Media Soc</source>
          <year>2017</year>
          <month>03</month>
          <day>22</day>
          <volume>3</volume>
          <issue>1</issue>
          <fpage>205630511769898</fpage>
          <pub-id pub-id-type="doi">10.1177/2056305117698981</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref154">
        <label>154</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Jung</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>An</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Kwak</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Salminen</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Jansen</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>Assessing the accuracy of four popular face recognition tools for inferring gender, age, and race</article-title>
          <source>Proc Int AAAI Conf Web Soc Media</source>
          <year>2018</year>
          <month>06</month>
          <day>15</day>
          <volume>12</volume>
          <issue>1</issue>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.aaai.org/ocs/index.php/ICWSM/ICWSM18/paper/view/17839"/>
          </comment>
          <pub-id pub-id-type="doi">10.1609/icwsm.v12i1.15058</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref155">
        <label>155</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Steyerberg</surname>
              <given-names>EW</given-names>
            </name>
            <name name-style="western">
              <surname>Harrell</surname>
              <given-names>FE</given-names>
            </name>
          </person-group>
          <article-title>Prediction models need appropriate internal, internal-external, and external validation</article-title>
          <source>J Clin Epidemiol</source>
          <year>2016</year>
          <month>01</month>
          <volume>69</volume>
          <fpage>245</fpage>
          <lpage>7</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/25981519"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jclinepi.2015.04.005</pub-id>
          <pub-id pub-id-type="medline">25981519</pub-id>
          <pub-id pub-id-type="pii">S0895-4356(15)00175-4</pub-id>
          <pub-id pub-id-type="pmcid">PMC5578404</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref156">
        <label>156</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Siontis</surname>
              <given-names>GC</given-names>
            </name>
            <name name-style="western">
              <surname>Ioannidis</surname>
              <given-names>JP</given-names>
            </name>
          </person-group>
          <article-title>Response to letter by Forike et al.: more rigorous, not less, external validation is needed</article-title>
          <source>J Clin Epidemiol</source>
          <year>2016</year>
          <month>01</month>
          <volume>69</volume>
          <fpage>250</fpage>
          <lpage>1</lpage>
          <pub-id pub-id-type="doi">10.1016/j.jclinepi.2015.01.021</pub-id>
          <pub-id pub-id-type="medline">25724895</pub-id>
          <pub-id pub-id-type="pii">S0895-4356(15)00053-0</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref157">
        <label>157</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Borkotoky</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Unisa</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Indicators to examine quality of large scale survey data: an example through district level household and facility survey</article-title>
          <source>PLoS One</source>
          <year>2014</year>
          <volume>9</volume>
          <issue>3</issue>
          <fpage>e90113</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.plos.org/10.1371/journal.pone.0090113"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0090113</pub-id>
          <pub-id pub-id-type="medline">24598760</pub-id>
          <pub-id pub-id-type="pii">PONE-D-13-34248</pub-id>
          <pub-id pub-id-type="pmcid">PMC3943897</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref158">
        <label>158</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Basannar</surname>
              <given-names>DR</given-names>
            </name>
            <name name-style="western">
              <surname>Singh</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Yadav</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Yadav</surname>
              <given-names>AK</given-names>
            </name>
          </person-group>
          <article-title>Quantifying age heaping and age misreporting in a multicentric survey</article-title>
          <source>Indian J Community Med</source>
          <year>2022</year>
          <volume>47</volume>
          <issue>1</issue>
          <fpage>104</fpage>
          <lpage>6</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/35368490"/>
          </comment>
          <pub-id pub-id-type="doi">10.4103/ijcm.ijcm_1179_21</pub-id>
          <pub-id pub-id-type="medline">35368490</pub-id>
          <pub-id pub-id-type="pii">IJCM-47-104</pub-id>
          <pub-id pub-id-type="pmcid">PMC8971874</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref159">
        <label>159</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gupta</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Gupta</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Dealing with noise problem in machine learning data-sets: a systematic review</article-title>
          <source>Procedia Comput Sci</source>
          <year>2019</year>
          <volume>161</volume>
          <fpage>466</fpage>
          <lpage>74</lpage>
          <pub-id pub-id-type="doi">10.1016/j.procs.2019.11.146</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref160">
        <label>160</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Karimi</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Dou</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Warfield</surname>
              <given-names>SK</given-names>
            </name>
            <name name-style="western">
              <surname>Gholipour</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Deep learning with noisy labels: exploring techniques and remedies in medical image analysis</article-title>
          <source>Med Image Anal</source>
          <year>2020</year>
          <month>10</month>
          <volume>65</volume>
          <fpage>101759</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/32623277"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.media.2020.101759</pub-id>
          <pub-id pub-id-type="medline">32623277</pub-id>
          <pub-id pub-id-type="pii">S1361-8415(20)30123-7</pub-id>
          <pub-id pub-id-type="pmcid">PMC7484266</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref161">
        <label>161</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Golder</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Scantlebury</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Christmas</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Understanding public attitudes toward researchers using social media for detecting and monitoring adverse events data: multi methods study</article-title>
          <source>J Med Internet Res</source>
          <year>2019</year>
          <month>08</month>
          <day>29</day>
          <volume>21</volume>
          <issue>8</issue>
          <fpage>e7081</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2019/8/e7081/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/jmir.7081</pub-id>
          <pub-id pub-id-type="medline">31469079</pub-id>
          <pub-id pub-id-type="pii">v21i8e7081</pub-id>
          <pub-id pub-id-type="pmcid">PMC6740159</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref162">
        <label>162</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Williams</surname>
              <given-names>ML</given-names>
            </name>
            <name name-style="western">
              <surname>Burnap</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Sloan</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Towards an ethical framework for publishing Twitter data in social research: taking into account users' views, online context and algorithmic estimation</article-title>
          <source>Sociology</source>
          <year>2017</year>
          <month>12</month>
          <day>26</day>
          <volume>51</volume>
          <issue>6</issue>
          <fpage>1149</fpage>
          <lpage>68</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://journals.sagepub.com/doi/abs/10.1177/0038038517708140?url_ver=Z39.88-2003&#38;rfr_id=ori:rid:crossref.org&#38;rfr_dat=cr_pub  0pubmed"/>
          </comment>
          <pub-id pub-id-type="doi">10.1177/0038038517708140</pub-id>
          <pub-id pub-id-type="medline">29276313</pub-id>
          <pub-id pub-id-type="pii">10.1177_0038038517708140</pub-id>
          <pub-id pub-id-type="pmcid">PMC5718335</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref163">
        <label>163</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Singh</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Polyzou</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Farr</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Gresenz</surname>
              <given-names>CR</given-names>
            </name>
          </person-group>
          <article-title>Social media data - our ethical conundrum</article-title>
          <source>Bulletin of the IEEE Computer Society Technical Committee on Data Engineerin</source>
          <year>2020</year>
          <access-date>2023-03-30</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://sites.computer.org/debull/A20dec/p23.pdf">http://sites.computer.org/debull/A20dec/p23.pdf</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref164">
        <label>164</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Valdez</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Keim-Malpass</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <person-group person-group-type="editor">
            <name name-style="western">
              <surname>Bian</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Guo</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>He</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Hu</surname>
              <given-names>X</given-names>
            </name>
          </person-group>
          <article-title>Ethics in health research using social media</article-title>
          <source>Social Web and Health Research</source>
          <year>2019</year>
          <publisher-loc>Cham, Switzerland</publisher-loc>
          <publisher-name>Springer</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref165">
        <label>165</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Benton</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Coppersmith</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Dredze</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Ethical research protocols for social media health research</article-title>
          <source>Proceedings of the First ACL Workshop on Ethics in Natural Language Processing</source>
          <conf-name>EthNLP@EACL</conf-name>
          <conf-date>April 4, 2017</conf-date>
          <conf-loc>Valencia, Spain</conf-loc>
          <pub-id pub-id-type="doi">10.18653/v1/w17-1612</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref166">
        <label>166</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>McKee</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Ethical issues in using social media for health and health care research</article-title>
          <source>Health Policy</source>
          <year>2013</year>
          <month>05</month>
          <volume>110</volume>
          <issue>2-3</issue>
          <fpage>298</fpage>
          <lpage>301</lpage>
          <pub-id pub-id-type="doi">10.1016/j.healthpol.2013.02.006</pub-id>
          <pub-id pub-id-type="medline">23477806</pub-id>
          <pub-id pub-id-type="pii">S0168-8510(13)00046-8</pub-id>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
