<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMIR</journal-id>
      <journal-id journal-id-type="nlm-ta">J Med Internet Res</journal-id>
      <journal-title>Journal of Medical Internet Research</journal-title>
      <issn pub-type="epub">1438-8871</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v22i5e17224</article-id>
      <article-id pub-id-type="pmid">32469317</article-id>
      <article-id pub-id-type="doi">10.2196/17224</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Mental Health–Related Behaviors and Discussions Among Young Adults: Analysis and Classification</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Eysenbach</surname>
            <given-names>Gunther</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Wick</surname>
            <given-names>Madeline</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Kerr</surname>
            <given-names>Bradley</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Rivas</surname>
            <given-names>Ryan</given-names>
          </name>
          <degrees>BS</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>Department of Computer Science and Engineering</institution>
            <institution>University of California, Riverside</institution>
            <addr-line>363 Winston Chung Hall</addr-line>
            <addr-line>900 University Ave</addr-line>
            <addr-line>Riverside, CA</addr-line>
            <country>United States</country>
            <phone>1 9518272838</phone>
            <email>rriva002@ucr.edu</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-5590-0274</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author">
          <name name-style="western">
            <surname>Shahbazi</surname>
            <given-names>Moloud</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-8031-0169</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Garett</surname>
            <given-names>Renee</given-names>
          </name>
          <degrees>MSW, LCSW</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-7170-3040</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Hristidis</surname>
            <given-names>Vagelis</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-8679-4988</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author">
          <name name-style="western">
            <surname>Young</surname>
            <given-names>Sean</given-names>
          </name>
          <degrees>MS, PhD</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-6052-4875</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Department of Computer Science and Engineering</institution>
        <institution>University of California, Riverside</institution>
        <addr-line>Riverside, CA</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>ElevateU</institution>
        <addr-line>Los Angeles, CA</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>University of California Institute for Prediction Technology</institution>
        <institution>University of California, Irvine</institution>
        <addr-line>Irvine, CA</addr-line>
        <country>United States</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Ryan Rivas <email>rriva002@ucr.edu</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <month>5</month>
        <year>2020</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>29</day>
        <month>5</month>
        <year>2020</year>
      </pub-date>
      <volume>22</volume>
      <issue>5</issue>
      <elocation-id>e17224</elocation-id>
      <history>
        <date date-type="received">
          <day>26</day>
          <month>11</month>
          <year>2019</year>
        </date>
        <date date-type="rev-request">
          <day>19</day>
          <month>1</month>
          <year>2020</year>
        </date>
        <date date-type="rev-recd">
          <day>1</day>
          <month>3</month>
          <year>2020</year>
        </date>
        <date date-type="accepted">
          <day>23</day>
          <month>3</month>
          <year>2020</year>
        </date>
      </history>
      <copyright-statement>©Ryan Rivas, Moloud Shahbazi, Renee Garett, Vagelis Hristidis, Sean Young. Originally published in the Journal of Medical Internet Research (http://www.jmir.org), 29.05.2020.</copyright-statement>
      <copyright-year>2020</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in the Journal of Medical Internet Research, is properly cited. The complete bibliographic information, a link to the original publication on http://www.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="http://www.jmir.org/2020/5/e17224/" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>There have been recurring reports of web-based harassment and abuse among adolescents and young adults through anonymous social networks.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>This study aimed to explore discussions on the popular anonymous social network Yik Yak related to social and mental health messaging behaviors among college students, including cyberbullying, to provide insights into mental health behaviors on college campuses.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>From April 6, 2016, to May 7, 2016, we collected anonymous conversations posted on Yik Yak at 19 universities in 4 different states and performed statistical analyses and text classification experiments on a subset of these messages.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>We found that prosocial messages were 5.23 times more prevalent than bullying messages. The frequency of cyberbullying messages was positively associated with messages seeking emotional help. We found significant geographic variation in the frequency of messages offering supportive vs bullying messages. Across campuses, bullying and political discussions were positively associated. We also achieved a balanced accuracy of over 0.75 for most messaging behaviors and topics with a support vector machine classifier.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>Our results show that messages containing data about students’ mental health–related attitudes and behaviors are prevalent on anonymous social networks, suggesting that these data can be mined for real-time analysis. This information can be used in education and health care services to better engage with students, provide insight into conversations that lead to cyberbullying, and reach out to students who need support.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>social media</kwd>
        <kwd>data analysis</kwd>
        <kwd>supervised machine learning</kwd>
        <kwd>universities</kwd>
        <kwd>students</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <sec>
        <title>Background</title>
        <p>The transition from high school to college marks the beginning of an important period of psychosocial development. The academic and social demands of college life are often rigorous and can pose a risk to undergraduate students’ health and well-being [<xref ref-type="bibr" rid="ref1">1</xref>]. One example of the challenges they face is poor sleep [<xref ref-type="bibr" rid="ref2">2</xref>], which has been linked to a number of adverse consequences, including higher rates of depressive symptoms and stress [<xref ref-type="bibr" rid="ref3">3</xref>,<xref ref-type="bibr" rid="ref4">4</xref>], weight gain [<xref ref-type="bibr" rid="ref5">5</xref>], and poor academic performance [<xref ref-type="bibr" rid="ref6">6</xref>]. Another concern for undergraduate students that has arisen in recent years is their social media use, as studies show a link between cyberbullying and major health problems such as substance use, depression, poor sleep, and suicide [<xref ref-type="bibr" rid="ref7">7</xref>-<xref ref-type="bibr" rid="ref9">9</xref>]. Given the array of health risks faced by undergraduate students, it is important to be aware of students’ health and risk-related behaviors to be able to provide adequate services and support, such as from psychological and medical campus services.</p>
        <p>Traditionally, methods for monitoring the health of a population, for example, students on a college campus, have focused on case reports and surveys [<xref ref-type="bibr" rid="ref10">10</xref>,<xref ref-type="bibr" rid="ref11">11</xref>]. Although these methods can offer insights into health-related attitudes and behaviors, they can be time- and cost-intensive to implement. However, researchers using social media data can collect and analyze behavior data in real time [<xref ref-type="bibr" rid="ref10">10</xref>,<xref ref-type="bibr" rid="ref11">11</xref>], allowing health authorities to address student needs in a flexible and timely manner.</p>
        <p>To explore the feasibility of using social media platforms to identify and predict health-related events, Young et al [<xref ref-type="bibr" rid="ref12">12</xref>] screened geolocated Twitter messages for keywords that suggested HIV risk behaviors. The authors used negative binomial regression analyses to determine the association between tweets about HIV risk behaviors and county-level HIV data in the United States. They ran analyses to determine the association between tweets about HIV risk behaviors and county-level HIV data in the United States. The results showed a strong association between tweets about HIV risk behaviors and actual county HIV data. Additionally, De Choudhury et al [<xref ref-type="bibr" rid="ref13">13</xref>] successfully used tweets to predict the onset of major depressive disorder with 70% accuracy. They selected tweets based on indicators such as linguistic style, use of terms associated with depression, and social network characteristics.</p>
        <p>Yik Yak was an anonymous web-based bulletin board for users within the same geographic area (eg, college campuses) that debuted in 2013 [<xref ref-type="bibr" rid="ref14">14</xref>]. At the time of this study, it was a popular social network for college students but faced substantial criticism. Critics argued, aided by anecdotal evidence relayed through media reports, that anonymous posting encourages harassment and bullying [<xref ref-type="bibr" rid="ref14">14</xref>-<xref ref-type="bibr" rid="ref17">17</xref>]. In a recent content analysis of Yik Yak conversations [<xref ref-type="bibr" rid="ref18">18</xref>], there was no evidence of a pervasive culture of harassment and abuse. However, contradictory to this analysis, researchers have observed derogatory and incendiary comments, arguably racist and sexist messages, and several likely instances of bullying [<xref ref-type="bibr" rid="ref18">18</xref>]. Furthermore, other research has shown that harassment is prevalent among users of Yik Yak and other anonymous social networks in Bangladesh [<xref ref-type="bibr" rid="ref19">19</xref>]. Although Yik Yak is now defunct, the rising popularity of anonymous social networks [<xref ref-type="bibr" rid="ref20">20</xref>] suggests that its data can still provide useful insights.</p>
      </sec>
      <sec>
        <title>Study Overview</title>
        <p>In this study, we explored two types of messages students made on Yik Yak. The first type consists of posts exhibiting messaging behaviors that can have an impact on students’ health in relation to cyberbullying. This includes cyberbullying itself, which has previously been linked to health problems [<xref ref-type="bibr" rid="ref7">7</xref>-<xref ref-type="bibr" rid="ref9">9</xref>]. It also includes prosocial messages, which are messages sent by a user with the intention of benefiting one or more other users [<xref ref-type="bibr" rid="ref21">21</xref>], or with the intention of seeking such messages. The prosocial messaging behaviors we selected are related to bullying and its effects on health. Two of these are seeking and offering support, as students with high depression or anxiety often turn to social media for social support [<xref ref-type="bibr" rid="ref22">22</xref>]. The second type consists of messages that discuss one of 4 topics frequently discussed by students on Yik Yak, such as relationships and living on campus, to provide additional context to the messaging behaviors we analyzed in this study. We analyze these messaging behaviors and topics by determining which ones are most frequently discussed and which are the most popular (in terms of votes) and by finding correlations between different messaging behaviors and topics.</p>
        <p>Our goal is to provide insights for school administrators, public health researchers, and health care professionals regarding the prevalence of messaging behaviors, such as bullying and social support, and knowledge of general topics discussed in the network. Specifically, the purpose of this study is to show that messaging behaviors that can have an impact on students’ health occur frequently on anonymous social networks, demonstrate how they are regarded by other students by analyzing their popularity, describe the prevalence and popularity of topics that are commonly discussed by college students, and explore the intercorrelations between these messaging behaviors and topics. Knowledge of these activities on anonymous social networks can inform interventions that promote healthy and prosocial behaviors among adolescents and young adults.</p>
        <p>We also investigated the feasibility of automatic classification of messaging behaviors and topics in this study. This involved training 3 machine learning algorithms with several combinations of hyperparameters to determine the best combination for each messaging behavior and topic. We report the results of these models on test data to demonstrate their effectiveness. An accurate classification model can complement the insights provided by this study by providing administrators, researchers, and health care professionals with a tool to more easily find relevant messages.</p>
      </sec>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Data</title>
        <p>From April 6, 2016, to May 7, 2016, we collected anonymous conversations posted on the Yik Yak social network at 5 randomly selected universities located in each of the 4 most populous US states: California (CA); Florida (FL); New York (NY); and Texas (TX). To protect our analyses from the influence of a university with an exceptionally large number of messages, we calculated the number of messages from each university per capita with respect to the number of students enrolled at that university. We then flagged universities that had a number of messages per enrolled student more than 1.5 SDs above their state’s mean. This resulted in the removal of 1 university, the University of Texas at Dallas, leaving a total of 19 universities. <xref ref-type="table" rid="table1">Table 1</xref> lists these universities, their status as either a public or a private university, their enrollment, and their ranking according to the 2017 <italic>Wall Street Journal</italic>/Times Higher Education College Rankings [<xref ref-type="bibr" rid="ref23">23</xref>]. Enrollment and rankings are used as part of our analysis of the interplay between variables. For our analysis, we randomly selected 100 conversation threads from each of the universities (N=16,966 messages), with a mean of 892.95 (SD 128) messages per university. We analyzed the messages with respect to the type of messaging behavior, content, and popularity of message type and content.</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Characteristics of universities included in the study.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="470"/>
            <col width="0"/>
            <col width="180"/>
            <col width="0"/>
            <col width="170"/>
            <col width="0"/>
            <col width="150"/>
            <thead>
              <tr valign="top">
                <td colspan="2">State and university</td>
                <td colspan="2">Public or private</td>
                <td colspan="2">Enrollment</td>
                <td colspan="2">Ranking</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="8">
                  <bold>CA<sup>a</sup></bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">California Polytechnic State University</td>
                <td colspan="2">Public</td>
                <td colspan="2">19,226</td>
                <td>221</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">CSU<sup>b</sup> Chico</td>
                <td colspan="2">Public</td>
                <td colspan="2">16,535</td>
                <td>467</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">CSU Los Angeles</td>
                <td colspan="2">Public</td>
                <td colspan="2">20,353</td>
                <td>700</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">CSU San Bernardino</td>
                <td colspan="2">Public</td>
                <td colspan="2">17,167</td>
                <td>700</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">University of California, Irvine</td>
                <td colspan="2">Public</td>
                <td colspan="2">25,001</td>
                <td>153</td>
              </tr>
              <tr valign="top">
                <td colspan="8">
                  <bold>FL<sup>c</sup></bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">Florida International University</td>
                <td colspan="2">Public</td>
                <td colspan="2">53,525</td>
                <td>550</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">Florida State University</td>
                <td colspan="2">Public</td>
                <td colspan="2">36,575</td>
                <td>226</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">University of Central Florida</td>
                <td colspan="2">Public</td>
                <td colspan="2">59,894</td>
                <td>445</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">University of Florida</td>
                <td colspan="2">Public</td>
                <td colspan="2">36,731</td>
                <td>56</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">University of South Florida</td>
                <td colspan="2">Public</td>
                <td colspan="2">35,035</td>
                <td>396</td>
              </tr>
              <tr valign="top">
                <td colspan="8">
                  <bold>NY<sup>d</sup></bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">Cornell University</td>
                <td colspan="2">Private</td>
                <td colspan="2">14,706</td>
                <td>9</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">CUNY<sup>e</sup> Hunter College</td>
                <td colspan="2">Public</td>
                <td colspan="2">20,582</td>
                <td>350</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">CUNY John Jay College of Criminal Justice</td>
                <td colspan="2">Public</td>
                <td colspan="2">15,845</td>
                <td>700</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">SUNY<sup>f</sup> Buffalo State</td>
                <td colspan="2">Public</td>
                <td colspan="2">10,665</td>
                <td>700</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">SUNY New Paltz</td>
                <td colspan="2">Public</td>
                <td colspan="2">7756</td>
                <td>423</td>
              </tr>
              <tr valign="top">
                <td colspan="8">
                  <bold>TX<sup>g</sup></bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">Tarleton State University</td>
                <td colspan="2">Public</td>
                <td colspan="2">11,008</td>
                <td>800</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">Texas Tech University</td>
                <td colspan="2">Public</td>
                <td colspan="2">29,342</td>
                <td>550</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">University of Houston</td>
                <td colspan="2">Public</td>
                <td colspan="2">36,128</td>
                <td>388</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">University of Texas, Rio Grande Valley</td>
                <td colspan="2">Public</td>
                <td colspan="2">27,560<sup>h</sup></td>
                <td>N/A<sup>i</sup></td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table1fn1">
              <p><sup>a</sup>CA: California.</p>
            </fn>
            <fn id="table1fn2">
              <p><sup>b</sup>CSU: California State University.</p>
            </fn>
            <fn id="table1fn3">
              <p><sup>c</sup>FL: Florida.</p>
            </fn>
            <fn id="table1fn4">
              <p><sup>d</sup>NY: New York.</p>
            </fn>
            <fn id="table1fn5">
              <p><sup>e</sup>CUNY: City University of New York.</p>
            </fn>
            <fn id="table1fn6">
              <p><sup>f</sup>SUNY: State University of New York.</p>
            </fn>
            <fn id="table1fn7">
              <p><sup>g</sup>TX: Texas.</p>
            </fn>
            <fn id="table1fn8">
              <p><sup>h</sup>Fall 2016 enrollment for the University of Texas Rio Grande Valley [<xref ref-type="bibr" rid="ref24">24</xref>].</p>
            </fn>
            <fn id="table1fn9">
              <p><sup>i</sup>N/A: not applicable.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Messaging Behaviors</title>
        <p>Within the context of this study, we use the term <italic>messaging behavior</italic> to refer to the intent of a message, that is, what a user is trying to accomplish by posting a message. For each message, we determined if it displayed 1 of the 4 predefined messaging behaviors listed in <xref ref-type="table" rid="table2">Table 2</xref>. Among these is bullying, which we included in our analysis because of its effects on student health [<xref ref-type="bibr" rid="ref7">7</xref>-<xref ref-type="bibr" rid="ref9">9</xref>]. A message was considered to be bullying if it intended harm (ie, if the purpose of the message appeared to be to negatively impact the recipient’s mental health), was indicative of a power imbalance (eg, the message was racist or sexist), and if the sender repeatedly sent these messages [<xref ref-type="bibr" rid="ref25">25</xref>]. We also included seeking help and offering support because of their relation to health and bullying—supportive environments can be seen as more healthy and possibly more likely to prevent or reduce bullying. Humor was included to better understand if users were intentionally bullied or trying to be humorous. A total of 2 undergraduate raters independently coded the selected messages for these 4 messaging behaviors; each message was assigned a messaging behavior only if both raters coded it as such.</p>
        <p><xref ref-type="table" rid="table3">Table 3</xref> lists the range, SD, mean, and median for several characteristics of messages with the messaging behaviors defined in <xref ref-type="table" rid="table2">Table 2</xref>: message length, measured in both characters and words; the number of replies received by any message; the number of replies received by initial posts (ie, the first message in a thread); the post time for messages posted between midnight and noon (AM); and the post time for messages posted between noon and midnight (PM).</p>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>Definitions of messaging behaviors included in the study.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="160"/>
            <col width="270"/>
            <col width="410"/>
            <col width="160"/>
            <thead>
              <tr valign="top">
                <td>Messaging behavior</td>
                <td>Definition</td>
                <td>Examples</td>
                <td>Cohen kappa (number of agreements)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Seeking help</td>
                <td>Seeking social support (eg, emotional support and help with problems) from other users</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>“I like don't know what to do with myself. Literally I have no one to talk to”</p>
                    </list-item>
                    <list-item>
                      <p>“What's the easiest class to fill art requirement? I'm terrible at art”</p>
                    </list-item>
                  </list>
                </td>
                <td>0.48 (90)</td>
              </tr>
              <tr valign="top">
                <td>Offering support</td>
                <td>Giving social support to other users</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>“Hope everything gets resolved OP!”</p>
                    </list-item>
                    <list-item>
                      <p>“You've got this!”</p>
                    </list-item>
                  </list>
                </td>
                <td>0.56 (86)</td>
              </tr>
              <tr valign="top">
                <td>Bullying</td>
                <td>Intends harm, indicative of a power imbalance, and messages are repeatedly sent [<xref ref-type="bibr" rid="ref25">25</xref>]</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>“You people are disgusting”</p>
                    </list-item>
                    <list-item>
                      <p>“In the words of DJ Khaled ‘congratulations you played yourself’ it's not hard to portray being a moron. It's quite sad actually”</p>
                    </list-item>
                  </list>
                </td>
                <td>0.00 (95)</td>
              </tr>
              <tr valign="top">
                <td>Humor</td>
                <td>Intends to be funny without bullying</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>“I predict my day based on my morning poo”</p>
                    </list-item>
                    <list-item>
                      <p>“Why get thinner when you can get more dinner?”</p>
                    </list-item>
                  </list>
                </td>
                <td>0.48 (87)</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
        <table-wrap position="float" id="table3">
          <label>Table 3</label>
          <caption>
            <p>Characteristics of messages with each messaging behavior.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="30"/>
            <col width="420"/>
            <col width="200"/>
            <col width="170"/>
            <col width="150"/>
            <thead>
              <tr valign="top">
                <td colspan="3">Characteristic</td>
                <td>Range</td>
                <td>Mean (SD)</td>
                <td>Median</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="6">
                  <bold>Seeking help</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="5">
                  <bold>Message length</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>Characters</td>
                <td>11-204</td>
                <td>74.10 (47.82)</td>
                <td>61</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>Words</td>
                <td>2-42</td>
                <td>14.61 (9.60)</td>
                <td>12</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="5">
                  <bold>Number of replies</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>All posts</td>
                <td>0-50</td>
                <td>4.14 (7.03)</td>
                <td>2</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>Initial post</td>
                <td>0-50</td>
                <td>5.47 (7.61)</td>
                <td>3</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="5">
                  <bold>Post time</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>AM</td>
                <td>12:01 AM-11:48 AM</td>
                <td>3:38 (2:56)</td>
                <td>2:43</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>PM</td>
                <td>12:06 PM-11:57 PM</td>
                <td>7:33 (3:13)</td>
                <td>8:06</td>
              </tr>
              <tr valign="top">
                <td colspan="6">
                  <bold>Offering support</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="5">
                  <bold>Message length</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td/>
                <td>Characters</td>
                <td>2-200</td>
                <td>74.87 (58.39)</td>
                <td>58</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>Words</td>
                <td>1-43</td>
                <td>14.39 (11.25)</td>
                <td>11</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="5">
                  <bold>Number of replies</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td/>
                <td>All posts</td>
                <td>0-17</td>
                <td>0.04 (0.66)</td>
                <td>0</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>Initial post</td>
                <td>0-17</td>
                <td>4.57 (6.50)</td>
                <td>1</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="5">
                  <bold>Post time</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>AM</td>
                <td>Midnight-11:57 AM</td>
                <td>3:27 (2:43)</td>
                <td>2:47</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>PM</td>
                <td>Noon-11:59 PM</td>
                <td>7:44 (3:01)</td>
                <td>8:25</td>
              </tr>
              <tr valign="top">
                <td colspan="6">
                  <bold>Bullying</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="5">
                  <bold>Message length</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>Characters</td>
                <td>3-230</td>
                <td>63.26 (49.64)</td>
                <td>47</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>Words</td>
                <td>1-40</td>
                <td>11.92 (9.32)</td>
                <td>9</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="5">
                  <bold>Number of replies</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>All posts</td>
                <td>0-44</td>
                <td>0.17 (2.42)</td>
                <td>0</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>Initial post</td>
                <td>0-44</td>
                <td>4.07 (11.53)</td>
                <td>1</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="5">
                  <bold>Post time</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>AM</td>
                <td>Midnight-11:58 AM</td>
                <td>3:37 (2:32)</td>
                <td>3:12</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>PM</td>
                <td>12:10 PM-11:58 PM</td>
                <td>8:38 (3:00)</td>
                <td>9:43</td>
              </tr>
              <tr valign="top">
                <td colspan="6">
                  <bold>Humor</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="5">
                  <bold>Message length</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>Characters</td>
                <td>2-199</td>
                <td>32.37 (43.96)</td>
                <td>36</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>Words</td>
                <td>1-41</td>
                <td>6.37 (8.43)</td>
                <td>7</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="5">
                  <bold>Number of replies</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>All posts</td>
                <td>0-9</td>
                <td>0.28 (1.02)</td>
                <td>0</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>Initial post</td>
                <td>0-9</td>
                <td>1.83 (2.02)</td>
                <td>1</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="5">
                  <bold>Post time</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>AM</td>
                <td>12:02 AM-11:58 AM</td>
                <td>3:21 (2:49)</td>
                <td>2:40</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>PM</td>
                <td>12:09 PM-23:59 PM</td>
                <td>7:17 (3:25)</td>
                <td>8:09</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
      </sec>
      <sec>
        <title>Message Topics</title>
        <p>We applied latent Dirichlet allocation (LDA) to the message corpus to identify themes within the message content. LDA is a common method for categorizing topics and themes [<xref ref-type="bibr" rid="ref26">26</xref>]. Each topic, in turn, is probabilistically associated with various words. As topics are defined purely in statistical terms, the user chooses its semantic interpretation (ie, its label) based on word probabilities for the topic.</p>
        <p>Next, we sought to identify topics in which the LDA message classifications aligned most closely with human judgment. We did this with a subset of 1200 randomly selected messages to which the LDA assigned a topic with a probability greater than 0.7. For each of these messages, a team of 3 raters decided if the LDA topic assignment was correct (ie, does the message discuss topic <italic>X</italic>). On the basis of these results, we selected the 4 topics with the highest classification accuracy: relationships and sex, college living, politics, and school and classes.</p>
        <p>In the final step, 2 undergraduate raters independently applied the 4-topic classification scheme to 96 randomly selected messages. We found that their interrater agreement was high (Cohen kappa=0.78), so all remaining messages were coded by 1 of the 2 raters. <xref ref-type="table" rid="table4">Table 4</xref> lists Cohen kappa for each individual topic; it is undefined for politics because neither rater coded any of the 96 messages for that topic.</p>
        <p><xref ref-type="table" rid="table5">Table 5</xref> lists the range, SD, mean, and median for several characteristics of messages with these topics.</p>
        <table-wrap position="float" id="table4">
          <label>Table 4</label>
          <caption>
            <p>Cohen kappa for each topic (n=96).</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="310"/>
            <col width="210"/>
            <col width="160"/>
            <col width="140"/>
            <col width="180"/>
            <thead>
              <tr valign="top">
                <td>Statistic</td>
                <td>Relationships and sex</td>
                <td>College living</td>
                <td>Politics</td>
                <td>School and classes</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Cohen kappa</td>
                <td>0.73</td>
                <td>1.00</td>
                <td>Undefined</td>
                <td>0.77</td>
              </tr>
              <tr valign="top">
                <td>Number of agreements</td>
                <td>90</td>
                <td>96</td>
                <td>96</td>
                <td>91</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
        <table-wrap position="float" id="table5">
          <label>Table 5</label>
          <caption>
            <p>Characteristics of messages with each topic.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="30"/>
            <col width="420"/>
            <col width="200"/>
            <col width="170"/>
            <col width="150"/>
            <thead>
              <tr valign="top">
                <td colspan="3">Characteristic</td>
                <td>Range</td>
                <td>Mean (SD)</td>
                <td>Median</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="6">
                  <bold>Relationships and sex</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="5">
                  <bold>Message length</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>Characters</td>
                <td>2-252</td>
                <td>82.18 (52.32)</td>
                <td>70</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>Words</td>
                <td>1-47</td>
                <td>16.17 (10.32)</td>
                <td>14</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="5">
                  <bold>Number of replies</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td/>
                <td>All posts</td>
                <td>0-50</td>
                <td>0.96 (3.43)</td>
                <td>0</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>Initial post</td>
                <td>0-50</td>
                <td>4.60 (6.31)</td>
                <td>3</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="5">
                  <bold>Post time</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>AM</td>
                <td>Midnight-11:58 AM</td>
                <td>3:27 (2:21)</td>
                <td>3:07</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>PM</td>
                <td>Noon-11:59 PM</td>
                <td>8:05 (3:16)</td>
                <td>8:55</td>
              </tr>
              <tr valign="top">
                <td colspan="6">
                  <bold>College living</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="5">
                  <bold>Message length</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>Characters</td>
                <td>3-200</td>
                <td>74.56 (49.98)</td>
                <td>62</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>Words</td>
                <td>1-42</td>
                <td>14.36 (9.52)</td>
                <td>12</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="5">
                  <bold>Number of replies</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td/>
                <td>All posts</td>
                <td>0-19</td>
                <td>0.83 (2.15)</td>
                <td>0</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>Initial post</td>
                <td>0-19</td>
                <td>2.60 (3.14)</td>
                <td>2</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="5">
                  <bold>Post time</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>AM</td>
                <td>Midnight-11:56 AM</td>
                <td>3:34 (2:38)</td>
                <td>2:57</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>PM</td>
                <td>Noon-11:59 PM</td>
                <td>6:57 (3:15)</td>
                <td>7:24</td>
              </tr>
              <tr valign="top">
                <td colspan="6">
                  <bold>Politics</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="5">
                  <bold>Message length</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>Characters</td>
                <td>5-210</td>
                <td>107.72 (58.43)</td>
                <td>99</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>Words</td>
                <td>1-43</td>
                <td>19.22 (10.65)</td>
                <td>17</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="5">
                  <bold>Number of replies</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td/>
                <td>All posts</td>
                <td>0-53</td>
                <td>0.83 (4.27)</td>
                <td>0</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>Initial post</td>
                <td>0-53</td>
                <td>7.13 (10.59)</td>
                <td>4</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="5">
                  <bold>Post time</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>AM</td>
                <td>Midnight-11:47 AM</td>
                <td>3:26 (2:32)</td>
                <td>3:06</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>PM</td>
                <td>12:08 PM-11:58 PM</td>
                <td>7:52 (3:11)</td>
                <td>7:30</td>
              </tr>
              <tr valign="top">
                <td colspan="6">
                  <bold>School and classes</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="5">
                  <bold>Message length</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>Characters</td>
                <td>3-202</td>
                <td>71.41 (49.59)</td>
                <td>59</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>Words</td>
                <td>1-42</td>
                <td>13.67 (9.38)</td>
                <td>11</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="5">
                  <bold>Number of replies</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>All posts</td>
                <td>0-44</td>
                <td>0.98 (3.33)</td>
                <td>0</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>Initial post</td>
                <td>0-44</td>
                <td>4.39 (5.90)</td>
                <td>3</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="5">
                  <bold>Post time</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>AM</td>
                <td>Midnight-11:58 AM</td>
                <td>3:41 (2:58)</td>
                <td>2:46</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>PM</td>
                <td>12:03 PM-11:59 PM</td>
                <td>6:58 (3:09)</td>
                <td>7:35</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
      </sec>
      <sec>
        <title>Analysis</title>
        <p>Our analysis consisted of 3 parts: frequency of messaging behaviors and topics, popularity of messaging behaviors and topics, and interplay between variables. In the first 2 parts, we used messages that raters uniquely assigned to 1 or none of the 4 predefined messaging behaviors to assess the frequency and popularity of messaging behaviors. Similarly, we used messages that raters uniquely assigned to 1 or none of the 4 LDA-derived topics to assess the frequency and popularity of messaging behaviors. In all statistical analyses, the significance criterion was alpha=.05.</p>
        <p>In our analysis of the relative frequencies of messaging behaviors and topics on Yik Yak, Bonferroni-corrected Fisher exact tests determined if differences in the frequencies of these messaging behaviors or topics across states were statistically significant. If we found that the differences for a messaging behavior or topic were significant, we followed this up with Bonferroni-corrected Fisher exact tests for pairwise comparisons between states of the frequency of that messaging behavior or topic.</p>
        <p>We determined the popularity of a message by the aggregate score of +1 votes (upvotes) and −1 votes (downvotes) assigned by Yik Yak users before data collection. Notably, if a message on Yik Yak reaches a sum score of −5, it is automatically deleted from the social network. Thus, the lowest possible popularity score for a message in our dataset was −4. To protect our analyses from the influence of a few massively popular messages, we flagged messages with a score greater than 2.5 SDs above the grand mean. We then submitted the individual message scores to state × messaging behavior and state × topic analysis of variance (ANOVA), followed up by Tukey range test to further investigate any significant main effects of each ANOVA.</p>
        <p>The third part of our analysis examined the relationship between the frequency of prosocial messages in which users sought help or offered support, the frequency of bullying messages, the popularity of these messaging behaviors, and the frequency of topics. We carried out an analysis at the university level. For each university, we calculated mean messaging behavior frequencies, the corresponding mean popularity scores, and mean topic frequencies. We measured correlations between these variables together with 2 additional variables—the number of students enrolled and school ranking.</p>
      </sec>
      <sec>
        <title>Classification</title>
        <p>We conducted a series of experiments with 3 text classification algorithms on the messaging behaviors and topics in this study. The first 2 are random forest [<xref ref-type="bibr" rid="ref27">27</xref>] and linear support vector machine (SVM) [<xref ref-type="bibr" rid="ref28">28</xref>] classifiers with term frequency-inverse document frequency (TF-IDF) vectors [<xref ref-type="bibr" rid="ref29">29</xref>], and the third is a convolutional neural network (CNN) text classifier [<xref ref-type="bibr" rid="ref30">30</xref>] with global vectors for word representation (GloVe) [<xref ref-type="bibr" rid="ref31">31</xref>].</p>
        <p>In each experiment, we selected 1 messaging behavior or topic and regarded each message in the dataset as a tuple (<italic>t</italic>, <italic>c</italic>), where <italic>t</italic> is the message text concatenated with tokens for the university and state the message is from, and <italic>c</italic> is a class label <italic>positive</italic> (the selected messaging behavior or topic is present in the message) or <italic>negative</italic> (the messaging behavior or topic is not present). We randomly selected 10.00% (1697/16,966) of the dataset to be used as the test dataset. With the remaining training dataset, we used 5-fold cross-validation and measured the balanced accuracy [<xref ref-type="bibr" rid="ref32">32</xref>] of each classifier to determine the best combination of classifier hyperparameters, which are then used with the full training dataset to build the final classifier model.</p>
        <p><xref ref-type="table" rid="table6">Table 6</xref> lists the hyperparameters and their respective values evaluated by our experiments for each classifier. For all classifiers, we preprocess the data by removing stop words and lemmatizing the remaining words with the natural language toolkit [<xref ref-type="bibr" rid="ref33">33</xref>]. For the random forest and SVM classifiers, we add <italic>balanced</italic> class weights as defined by Scikit-learn [<xref ref-type="bibr" rid="ref34">34</xref>]. The TF-IDF vectors are also built from the implementation in Scikit-learn [<xref ref-type="bibr" rid="ref34">34</xref>]. The remaining hyperparameters are set to their default values, as defined by the implementations of these classifiers in Scikit-learn [<xref ref-type="bibr" rid="ref34">34</xref>]. For the CNN classifier, we perform upsampling such that the positive messages in the training data are as frequent as the negative messages and use 100-dimension GloVe vectors pretrained on Twitter data. All other CNN hyperparameters are set to their default values as defined in the code by Ng [<xref ref-type="bibr" rid="ref35">35</xref>].</p>
        <table-wrap position="float" id="table6">
          <label>Table 6</label>
          <caption>
            <p>Classifier hyperparameter values evaluated in our experiments.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="470"/>
            <col width="500"/>
            <thead>
              <tr valign="top">
                <td colspan="2">Classifier and hyperparameter</td>
                <td>Values</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="3">
                  <bold>Random forest</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Maximum tree depth</td>
                <td>2, 4, 8, 16, 32, 64</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Number of trees</td>
                <td>10, 100, 1000</td>
              </tr>
              <tr valign="top">
                <td colspan="3">
                  <bold>SVM<sup>a</sup></bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <italic>C</italic>
                  <sup>b</sup>
                </td>
                <td>0.001, 0.01, 0.1, 1, 10</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Loss function</td>
                <td>Hinge, squared hinge</td>
              </tr>
              <tr valign="top">
                <td colspan="3">
                  <bold>CNN<sup>c</sup></bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Filter window sizes</td>
                <td>(2, 3, 4), (3, 4, 5), (4, 5, 6)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Feature maps per filter window size</td>
                <td>100, 200, 300, 400, 500, 600</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table6fn1">
              <p><sup>a</sup>SVM: support vector machine.</p>
            </fn>
            <fn id="table6fn2">
              <p><sup>b</sup><italic>C</italic>: SVM regularization parameter.</p>
            </fn>
            <fn id="table6fn3">
              <p><sup>c</sup>CNN: convolutional neural network.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Frequency of Messaging Behaviors</title>
        <p>A total of 11.91% (2021/16,966) of the messages were focused on 1 of the 4 predefined messaging behavior categories: seeking help, offering support, humor, and bullying. <xref ref-type="table" rid="table7">Table 7</xref> lists the frequencies of these messaging behaviors by state. We found significant differences in the relative frequency of messages offering support (<italic>P</italic>&#60;.001) and bullying messages (<italic>P</italic>&#60;.001). We found no significant geographic differences for messages seeking help (<italic>P</italic>=.20) or for humorous messages (<italic>P</italic>=.40). Using separate Fisher exact tests, we found that the 2 states with the lowest rates of bullying, CA and FL, differed significantly from the states with the highest rates, NY and TX (<italic>P</italic>&#60;.001 for CA vs TX and FL vs TX, <italic>P</italic>=.001 for CA vs NY, <italic>P</italic>=.003 for FL vs NY).</p>
        <table-wrap position="float" id="table7">
          <label>Table 7</label>
          <caption>
            <p>Frequency of messaging behaviors by state.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="220"/>
            <col width="120"/>
            <col width="120"/>
            <col width="120"/>
            <col width="120"/>
            <col width="140"/>
            <col width="160"/>
            <thead>
              <tr valign="top">
                <td>Messaging behavior</td>
                <td>CA<sup>a</sup> (N=4496), n (%)</td>
                <td>FL<sup>b</sup> (N=4694), n (%)</td>
                <td>NY<sup>c</sup> (N=4273), n (%)</td>
                <td>TX<sup>d</sup> (N=3503), n (%)</td>
                <td>Total (N=16,966), n (%)</td>
                <td>Bonferroni-corrected Fisher exact <italic>P</italic> value</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Seeking help</td>
                <td>70 (1.56)</td>
                <td>94 (2.00)</td>
                <td>65 (1.52)</td>
                <td>70 (2.00)</td>
                <td>299 (1.76)</td>
                <td>.20</td>
              </tr>
              <tr valign="top">
                <td>Offering support</td>
                <td>183 (4.07)</td>
                <td>381 (8.12)</td>
                <td>234 (5.48)</td>
                <td>88 (2.51)</td>
                <td>886 (5.22)</td>
                <td>&#60;.001</td>
              </tr>
              <tr valign="top">
                <td>Bullying</td>
                <td>61 (1.36)</td>
                <td>68 (1.45)</td>
                <td>98 (2.29)</td>
                <td>93 (2.65)</td>
                <td>320 (1.96)</td>
                <td>&#60;.001</td>
              </tr>
              <tr valign="top">
                <td>Humor</td>
                <td>140 (3.11)</td>
                <td>134 (2.85)</td>
                <td>144 (3.37)</td>
                <td>98 (2.80)</td>
                <td>516 (3.15)</td>
                <td>.40</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table7fn1">
              <p><sup>a</sup>CA: California.</p>
            </fn>
            <fn id="table7fn2">
              <p><sup>b</sup>FL: Florida.</p>
            </fn>
            <fn id="table7fn3">
              <p><sup>c</sup>NY: New York.</p>
            </fn>
            <fn id="table7fn4">
              <p><sup>d</sup>TX: Texas.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <p>We also evaluated a sample of messages that were not assigned any of the 4 predefined messaging behavior categories to better understand the nature of messaging behavior outside of these categories. This sample consisted of 100 messages that were the first messages in their respective conversation threads. We found that the majority of these messages (68/100) were commentary, for example, anticipation of future events (“Cant wait for summer!!! #summer16”), reactions to personal experiences (“I hate when people tell me to put on headphones.”), and observations (“So many economics majors on yikyak nowadays”). Other messages (16/100) asked questions that did not seek social support, for example, soliciting opinions (“Do you think all pedophiles should be executed or do you think they deserve a 2nd chance and then should be executed if they relapse?”) and polling (“Quick poll. What's your ethnicity?”). Further messages (12/100) sought people to meet with or talk to for purposes other than social support, for example, for dating (“Any cute girls in the dorms? Drop your snapchat names”) or classes (“Anyone in geology 210 on M for 4:00-5:50?”).</p>
        <p>The remaining messages in the sample (4/100) lacked sufficient context to judge their messaging behavior. Although these broadly defined messaging behaviors are not directly related to this study and, thus, not subjected to further analysis, this sample of posts shows that future work focusing on the commentary present on an anonymous social network would likely have substantial coverage of the message content of that network.</p>
      </sec>
      <sec>
        <title>Frequency of Topics</title>
        <p>Using only messages with 1 or none of the 4 LDA-derived topics (relationships and sex, college living, politics, and school and classes), we excluded 0.69% (117/16,966) of the messages from the frequency analysis. A total of 26.33% (4437/16,849) of the remaining messages dealt with either relationships and sex (2516/16,849, 14.93%), college living (644/16,849, 3.82%), politics (607/16,849, 3.60%), or school and classes (670/16,849, 3.98%). In <xref ref-type="table" rid="table8">Table 8</xref>, we break these numbers down further by state. Using separate Fisher exact tests, we found significant regional differences for each topic. NY had the fewest relationship messages and differed significantly from CA (<italic>P</italic>&#60;.001) and TX (<italic>P</italic>=.048).</p>
        <table-wrap position="float" id="table8">
          <label>Table 8</label>
          <caption>
            <p>Frequency of topics by state.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="220"/>
            <col width="120"/>
            <col width="120"/>
            <col width="120"/>
            <col width="120"/>
            <col width="140"/>
            <col width="160"/>
            <thead>
              <tr valign="top">
                <td>Topics</td>
                <td>CA<sup>a</sup> (N=4443), n (%)</td>
                <td>FL<sup>b</sup> (N=4668), n (%)</td>
                <td>NY<sup>c</sup> (N=4253), n (%)</td>
                <td>TX<sup>d</sup> (N=3485), n (%)</td>
                <td>Total (N=16,849), n (%)</td>
                <td>Bonferroni-corrected Fisher exact <italic>P</italic> value</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Relationships and sex</td>
                <td>730 (16.43)</td>
                <td>689 (14.76)</td>
                <td>532 (13.21)</td>
                <td>535 (15.35)</td>
                <td>2516 (14.93)</td>
                <td>&#60;.001</td>
              </tr>
              <tr valign="top">
                <td>College living</td>
                <td>224 (5.04)</td>
                <td>83 (1.78)</td>
                <td>157 (3.69)</td>
                <td>180 (5.16)</td>
                <td>644 (3.82)</td>
                <td>&#60;.001</td>
              </tr>
              <tr valign="top">
                <td>Politics</td>
                <td>133 (2.99)</td>
                <td>122 (2.61)</td>
                <td>317 (7.45)</td>
                <td>35 (1.00)</td>
                <td>607 (3.60)</td>
                <td>&#60;.001</td>
              </tr>
              <tr valign="top">
                <td>School and classes</td>
                <td>208 (4.68)</td>
                <td>114 (2.44)</td>
                <td>150 (3.53)</td>
                <td>198 (5.68)</td>
                <td>670 (3.98)</td>
                <td>&#60;.001</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table8fn1">
              <p><sup>a</sup>CA: California.</p>
            </fn>
            <fn id="table8fn2">
              <p><sup>b</sup>FL: Florida.</p>
            </fn>
            <fn id="table8fn3">
              <p><sup>c</sup>NY: New York.</p>
            </fn>
            <fn id="table8fn4">
              <p><sup>d</sup>TX: Texas.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <p>We followed up on these significant effects with Bonferroni-corrected Fisher exact tests for all pairwise comparisons between states for each topic. We found significant differences in the number of college living messages between all states (<italic>P</italic>&#60;.001), except for CA and TX, the 2 states with the most college living messages (<italic>P</italic>=.76). Finally, we found significant differences in the frequency of school-related messages between states (<italic>P</italic>&#60;.001); CA and TX, where school was discussed the most, had the least significant difference (<italic>P</italic>=.04).</p>
      </sec>
      <sec>
        <title>Popularity of Messaging Behaviors</title>
        <p>In this and the following section, we report findings on the popularity of the different messaging behaviors and topics, based on the aggregate of +1 votes (upvotes) and −1 votes (downvotes) each message elicited from Yik Yak users. We identified 1.80% (305/16,966) of the messages as popularity outliers and excluded these from further analysis.</p>
        <p><xref ref-type="table" rid="table9">Table 9</xref> displays the mean popularity scores for the 4 messaging behaviors (seeking help, offering support, bullying, and humor) at the state level (CA, FL, NY, and TX). We submitted the individual message scores to a state × messaging behavior ANOVA. Both main effects were significant: <italic>F</italic><sub>3,1940</sub>=5.11, mean square error (MSE)=4.1, and <italic>P</italic>=.002 for state and <italic>F</italic><sub>3,1940</sub>=25.85, MSE=4.1, and <italic>P</italic>&#60;.001 for messaging behavior. The interaction between the 2 factors was not significant (<italic>F</italic><sub>9,1940</sub>=0.94; MSE=4.1; <italic>P</italic>=.49).</p>
        <table-wrap position="float" id="table9">
          <label>Table 9</label>
          <caption>
            <p>Popularity of messaging behaviors and topics by state.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="200"/>
            <col width="110"/>
            <col width="50"/>
            <col width="110"/>
            <col width="50"/>
            <col width="0"/>
            <col width="110"/>
            <col width="50"/>
            <col width="0"/>
            <col width="110"/>
            <col width="50"/>
            <col width="0"/>
            <col width="110"/>
            <col width="50"/>
            <thead>
              <tr valign="top">
                <td>Messaging behavior</td>
                <td colspan="2">CA<sup>a</sup></td>
                <td colspan="2">FL<sup>b</sup></td>
                <td colspan="3">NY<sup>c</sup></td>
                <td colspan="3">TX<sup>d</sup></td>
                <td colspan="3">Total</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Mean<sup>e</sup> (SE)</td>
                <td>n</td>
                <td>Mean (SE)</td>
                <td colspan="2">n</td>
                <td>Mean (SE)</td>
                <td colspan="2">n</td>
                <td>Mean (SE)</td>
                <td colspan="2">n</td>
                <td>Mean (SE)</td>
                <td>n</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Seeking help</td>
                <td>1.04 (0.26)</td>
                <td>68</td>
                <td>1.37 (0.21)</td>
                <td colspan="2">92</td>
                <td>0.78 (0.30)</td>
                <td colspan="2">63</td>
                <td>0.53 (0.27)</td>
                <td colspan="2">70</td>
                <td>0.97 (0.13)</td>
                <td>293</td>
              </tr>
              <tr valign="top">
                <td>Offering support</td>
                <td>1.00 (0.11)</td>
                <td>182</td>
                <td>0.98 (0.08)</td>
                <td colspan="2">380</td>
                <td>1.22 (0.12)</td>
                <td colspan="2">230</td>
                <td>0.77 (0.16)</td>
                <td colspan="2">88</td>
                <td>1.03 (0.06)</td>
                <td>880</td>
              </tr>
              <tr valign="top">
                <td>Bullying</td>
                <td>0.40 (0.32)</td>
                <td>58</td>
                <td>0.32 (0.17)</td>
                <td colspan="2">68</td>
                <td>0.59 (0.23)</td>
                <td colspan="2">96</td>
                <td>0.32 (0.18)</td>
                <td colspan="2">92</td>
                <td>0.42 (0.11)</td>
                <td>314</td>
              </tr>
              <tr valign="top">
                <td>Humor</td>
                <td>1.50 (0.20)</td>
                <td>124</td>
                <td>1.71 (0.22)</td>
                <td colspan="2">125</td>
                <td>2.14 (0.27)</td>
                <td colspan="2">130</td>
                <td>1.27 (0.20)</td>
                <td colspan="2">90</td>
                <td>1.69 (0.12)</td>
                <td>469</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table9fn1">
              <p><sup>a</sup>CA: California.</p>
            </fn>
            <fn id="table9fn2">
              <p><sup>b</sup>FL: Florida.</p>
            </fn>
            <fn id="table9fn3">
              <p><sup>c</sup>NY: New York.</p>
            </fn>
            <fn id="table9fn4">
              <p><sup>d</sup>TX: Texas.</p>
            </fn>
            <fn id="table9fn5">
              <p><sup>e</sup>Mean: Mean message popularity scores are based on the aggregate number of upvotes (+1) and downvotes (−1) per message.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <p>We used Tukey range test to determine which state exhibited significantly different mean popularity scores. This analysis revealed that, on average, Yik Yak messages received lower popularity scores in TX than in FL (<italic>P</italic>=.03) and NY (<italic>P</italic>&#60;.001). Additionally, Tukey test showed that bullying messages were the least popular and differed significantly from messages seeking help (<italic>P</italic>=.003), messages offering support (<italic>P</italic>&#60;.001), or humorous messages (<italic>P</italic>=.001). In contrast, humorous messages were the most popular and scored significantly higher than the other 3 message types (all <italic>P</italic>&#60;.001).</p>
      </sec>
      <sec>
        <title>Popularity of Topics</title>
        <p><xref ref-type="table" rid="table10">Table 10</xref> summarizes the mean popularity scores of messages that discussed 1 of the 4 topics identified through LDA: relationships and sex, college living, politics, or school and classes. A state (CA, FL, NY, and TX) × topic ANOVA revealed main effects of <italic>F</italic><sub>3,4293</sub>=11.23, MSE=4.9, and <italic>P</italic>&#60;.001 for state and <italic>F</italic><sub>3,4293</sub>=7.32, MSE=4.9, and <italic>P</italic>&#60;.001 for the topic as well as a significant state-by-topic interaction of <italic>F</italic><sub>9,4293</sub>=2.52, MSE=4.9, and <italic>P</italic>=.007. We carried out Tukey test to further investigate the significant main effects. We found that TX, the state with the lowest popularity scores overall, differed significantly from CA (<italic>P</italic>&#60;.001), FL (<italic>P</italic>=.03), and NY (<italic>P</italic>&#60;.001). Regarding the popularity of topics, school and classes was a significantly less popular topic than relationships and sex (<italic>P</italic>=.002), college living (<italic>P</italic>=.002), and politics (<italic>P</italic>=.001).</p>
        <table-wrap position="float" id="table10">
          <label>Table 10</label>
          <caption>
            <p>Popularity of topics by state.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="200"/>
            <col width="110"/>
            <col width="50"/>
            <col width="0"/>
            <col width="110"/>
            <col width="50"/>
            <col width="0"/>
            <col width="110"/>
            <col width="50"/>
            <col width="0"/>
            <col width="110"/>
            <col width="50"/>
            <col width="0"/>
            <col width="110"/>
            <col width="50"/>
            <thead>
              <tr valign="top">
                <td>Topic</td>
                <td colspan="3">CA<sup>a</sup></td>
                <td colspan="3">FL<sup>b</sup></td>
                <td colspan="3">NY<sup>c</sup></td>
                <td colspan="3">TX<sup>d</sup></td>
                <td colspan="2">Total</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Mean<sup>e</sup> (SE)</td>
                <td>n</td>
                <td colspan="2">Mean (SE)</td>
                <td>n</td>
                <td colspan="2">Mean (SE)</td>
                <td>n</td>
                <td colspan="2">Mean (SE)</td>
                <td>n</td>
                <td colspan="2">Mean (SE)</td>
                <td>n</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Relationships and sex</td>
                <td>1.56 (0.09)</td>
                <td>700</td>
                <td colspan="2">1.03 (0.08)</td>
                <td>678</td>
                <td colspan="2">1.16 (0.10)</td>
                <td>548</td>
                <td colspan="2">0.96 (0.08)</td>
                <td>528</td>
                <td colspan="2">1.19 (0.05)</td>
                <td>2454</td>
              </tr>
              <tr valign="top">
                <td>College living</td>
                <td>1.31 (0.15)</td>
                <td>209</td>
                <td colspan="2">1.56 (0.26)</td>
                <td>78</td>
                <td colspan="2">1.70 (0.23)</td>
                <td>146</td>
                <td colspan="2">0.78 (0.14)</td>
                <td>175</td>
                <td colspan="2">1.28 (0.09)</td>
                <td>608</td>
              </tr>
              <tr valign="top">
                <td>Politics</td>
                <td>1.17 (0.21)</td>
                <td>129</td>
                <td colspan="2">1.46 (0.24)</td>
                <td>119</td>
                <td colspan="2">1.34 (0.14)</td>
                <td>314</td>
                <td colspan="2">1.49 (0.43)</td>
                <td>35</td>
                <td colspan="2">1.34 (0.10)</td>
                <td>597</td>
              </tr>
              <tr valign="top">
                <td>School and classes</td>
                <td>0.84 (0.12)</td>
                <td>197</td>
                <td colspan="2">1.09 (0.20)</td>
                <td>114</td>
                <td colspan="2">1.08 (0.18)</td>
                <td>145</td>
                <td colspan="2">0.43 (0.09)</td>
                <td>194</td>
                <td colspan="2">0.82 (0.07)</td>
                <td>650</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table10fn1">
              <p><sup>a</sup>CA: California.</p>
            </fn>
            <fn id="table10fn2">
              <p><sup>b</sup>FL: Florida.</p>
            </fn>
            <fn id="table10fn3">
              <p><sup>c</sup>NY: New York.</p>
            </fn>
            <fn id="table10fn4">
              <p><sup>d</sup>TX: Texas.</p>
            </fn>
            <fn id="table10fn5">
              <p><sup>e</sup>Mean: Mean message popularity scores are based on the aggregate number of upvotes (+1) and downvotes (−1) per message.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <p>The significant state-by-topic interaction indicates that states differ with respect to the relative popularity of topics. To identify patterns of topic popularity within each state, we conducted ANOVAs with topic as a single factor, separately for each state. These ANOVAs yielded a significant effect of topic for CA (<italic>F</italic><sub>3,1231</sub>=5.36; MSE=5.39; <italic>P</italic>=.001) and TX (<italic>F</italic><sub>3,928</sub>=5.84; MSE=3.17; <italic>P</italic>&#60;.001) but not for FL (<italic>F</italic><sub>3,985</sub>=2.41; MSE=4.91; <italic>P</italic>=.07) or NY (<italic>F</italic><sub>3, 1149</sub>=2.34; MSE=5.7; <italic>P</italic>=.07). We followed up on the significant effects for CA and TX using Tukey test. In CA, school and classes were a less popular topic than relationships and sex (<italic>P</italic>&#60;.001). In TX, messages about school and classes were less popular than messages about relationships (<italic>P</italic>=.002) and politics (<italic>P</italic>&#60;.009).</p>
      </sec>
      <sec>
        <title>Interplay Between Variables</title>
        <p>We summarize the intercorrelations between the frequency of prosocial messages in which users sought help or offered support, the frequency of bullying messages, the popularity of these messaging behaviors, the frequency of topics, and school enrollment and ranking in <xref ref-type="table" rid="table11">Table 11</xref>. These correlations are based on 19 schools, except for correlations involving the variable <italic>ranking</italic>, for which n=18.</p>
        <p>We found that schools with a greater frequency of help-seeking messages also exhibited a greater frequency of messages offering support (<italic>P</italic>=.04). Campuses where students posted less about relationships and sex sent more messages offering support (<italic>P</italic>=.002). Moreover, messages offering support were more frequent at higher-ranking schools (<italic>P</italic>=.006). Bullying occurred more often on campuses where users posted more about politics (<italic>P</italic>=.048) and where messages seeking help were popular (<italic>P</italic>=.02). Messages offering support were more popular at campuses where students posted more about classes (<italic>P</italic>=.04). Finally, we found that the frequency of messages about college living was positively related to the frequency of messages about classes (<italic>P</italic>=.04) but negatively related to the number of enrolled students (<italic>P</italic>=.05). The remaining correlations in <xref ref-type="table" rid="table9">Table 9</xref> were not statistically significant.</p>
        <table-wrap position="float" id="table11">
          <label>Table 11</label>
          <caption>
            <p>Intercorrelations at the school level.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="160"/>
            <col width="70"/>
            <col width="70"/>
            <col width="70"/>
            <col width="70"/>
            <col width="70"/>
            <col width="70"/>
            <col width="70"/>
            <col width="70"/>
            <col width="70"/>
            <col width="70"/>
            <col width="70"/>
            <col width="70"/>
            <thead>
              <tr valign="top">
                <td>Variable</td>
                <td>SH<sup>a</sup></td>
                <td>OS<sup>b</sup></td>
                <td>BU<sup>c</sup></td>
                <td>PH<sup>d</sup></td>
                <td>PS<sup>e</sup></td>
                <td>PB<sup>f</sup></td>
                <td>RS<sup>g</sup></td>
                <td>CL<sup>h</sup></td>
                <td>PO<sup>i</sup></td>
                <td>SC<sup>j</sup></td>
                <td>EN<sup>k</sup></td>
                <td>RA<sup>l</sup></td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>SH</td>
                <td>—<sup>m</sup></td>
                <td>0.48</td>
                <td>−0.13</td>
                <td>−0.06</td>
                <td>0.37</td>
                <td>0.01</td>
                <td>−0.35</td>
                <td>0.01</td>
                <td>−0.38</td>
                <td>0.36</td>
                <td>0.17</td>
                <td>−0.29</td>
              </tr>
              <tr valign="top">
                <td>OS</td>
                <td>—<sup>n</sup></td>
                <td>—<sup>m</sup></td>
                <td>−0.33</td>
                <td>0.16</td>
                <td>0.00</td>
                <td>0.05</td>
                <td>−0.66</td>
                <td>−0.30</td>
                <td>0.07</td>
                <td>−0.08</td>
                <td>0.20</td>
                <td>−0.62</td>
              </tr>
              <tr valign="top">
                <td>BU</td>
                <td>—<sup>n</sup></td>
                <td>—<sup>n</sup></td>
                <td>—<sup>m</sup></td>
                <td>0.52</td>
                <td>0.37</td>
                <td>−0.35</td>
                <td>0.36</td>
                <td>0.01</td>
                <td>0.46</td>
                <td>−0.07</td>
                <td>−0.07</td>
                <td>0.10</td>
              </tr>
              <tr valign="top">
                <td>PH</td>
                <td>—<sup>n</sup></td>
                <td>—<sup>n</sup></td>
                <td>—<sup>n</sup></td>
                <td>—<sup>m</sup></td>
                <td>0.37</td>
                <td>−0.02</td>
                <td>0.19</td>
                <td>−0.03</td>
                <td>0.30</td>
                <td>−0.11</td>
                <td>0.90</td>
                <td>−0.21</td>
              </tr>
              <tr valign="top">
                <td>PS</td>
                <td>—<sup>n</sup></td>
                <td>—<sup>n</sup></td>
                <td>—<sup>n</sup></td>
                <td>—<sup>n</sup></td>
                <td>—<sup>m</sup></td>
                <td>−0.18</td>
                <td>0.26</td>
                <td>0.19</td>
                <td>0.16</td>
                <td>0.47</td>
                <td>−0.15</td>
                <td>−0.17</td>
              </tr>
              <tr valign="top">
                <td>PB</td>
                <td>—<sup>n</sup></td>
                <td>—<sup>n</sup></td>
                <td>—<sup>n</sup></td>
                <td>—<sup>n</sup></td>
                <td>—<sup>n</sup></td>
                <td>—<sup>m</sup></td>
                <td>−0.20</td>
                <td>−0.11</td>
                <td>0.13</td>
                <td>0.03</td>
                <td>−0.21</td>
                <td>−0.08</td>
              </tr>
              <tr valign="top">
                <td>RS</td>
                <td>—<sup>n</sup></td>
                <td>—<sup>n</sup></td>
                <td>—<sup>n</sup></td>
                <td>—<sup>n</sup></td>
                <td>—<sup>n</sup></td>
                <td>—<sup>n</sup></td>
                <td>—<sup>m</sup></td>
                <td>0.09</td>
                <td>−0.09</td>
                <td>−0.02</td>
                <td>0.09</td>
                <td>0.29</td>
              </tr>
              <tr valign="top">
                <td>CL</td>
                <td>—<sup>n</sup></td>
                <td>—<sup>n</sup></td>
                <td>—<sup>n</sup></td>
                <td>—<sup>n</sup></td>
                <td>—<sup>n</sup></td>
                <td>—<sup>n</sup></td>
                <td>—<sup>n</sup></td>
                <td>—<sup>m</sup></td>
                <td>−0.14</td>
                <td>0.47</td>
                <td>−0.45</td>
                <td>0.29</td>
              </tr>
              <tr valign="top">
                <td>PO</td>
                <td>—<sup>n</sup></td>
                <td>—<sup>n</sup></td>
                <td>—<sup>n</sup></td>
                <td>—<sup>n</sup></td>
                <td>—<sup>n</sup></td>
                <td>—<sup>n</sup></td>
                <td>—<sup>n</sup></td>
                <td>—<sup>n</sup></td>
                <td>—<sup>m</sup></td>
                <td>−0.19</td>
                <td>−0.27</td>
                <td>−0.35</td>
              </tr>
              <tr valign="top">
                <td>SC</td>
                <td>—<sup>n</sup></td>
                <td>—<sup>n</sup></td>
                <td>—<sup>n</sup></td>
                <td>—<sup>n</sup></td>
                <td>—<sup>n</sup></td>
                <td>—<sup>n</sup></td>
                <td>—<sup>n</sup></td>
                <td>—<sup>n</sup></td>
                <td>—<sup>n</sup></td>
                <td>—<sup>m</sup></td>
                <td>−0.26</td>
                <td>−0.01</td>
              </tr>
              <tr valign="top">
                <td>EN</td>
                <td>—<sup>n</sup></td>
                <td>—<sup>n</sup></td>
                <td>—<sup>n</sup></td>
                <td>—<sup>n</sup></td>
                <td>—<sup>n</sup></td>
                <td>—<sup>n</sup></td>
                <td>—<sup>n</sup></td>
                <td>—<sup>n</sup></td>
                <td>—<sup>n</sup></td>
                <td>—<sup>n</sup></td>
                <td>—<sup>m</sup></td>
                <td>−0.33</td>
              </tr>
              <tr valign="top">
                <td>RA</td>
                <td>—<sup>n</sup></td>
                <td>—<sup>n</sup></td>
                <td>—<sup>n</sup></td>
                <td>—<sup>n</sup></td>
                <td>—<sup>n</sup></td>
                <td>—<sup>n</sup></td>
                <td>—<sup>n</sup></td>
                <td>—<sup>n</sup></td>
                <td>—<sup>n</sup></td>
                <td>—<sup>n</sup></td>
                <td>—<sup>n</sup></td>
                <td>—<sup>m</sup></td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table11fn1">
              <p><sup>a</sup>SH: seeking help.</p>
            </fn>
            <fn id="table11fn2">
              <p><sup>b</sup>OS: offering support.</p>
            </fn>
            <fn id="table11fn3">
              <p><sup>c</sup>BU: bullying.</p>
            </fn>
            <fn id="table11fn4">
              <p><sup>d</sup>PH: popularity of seeking help.</p>
            </fn>
            <fn id="table11fn5">
              <p><sup>e</sup>PS: popularity of offering support.</p>
            </fn>
            <fn id="table11fn6">
              <p><sup>f</sup>PB: popularity of bullying.</p>
            </fn>
            <fn id="table11fn7">
              <p><sup>g</sup>RS: relationships and sex.</p>
            </fn>
            <fn id="table11fn8">
              <p><sup>h</sup>CL: college living.</p>
            </fn>
            <fn id="table11fn9">
              <p><sup>i</sup>PO: politics.</p>
            </fn>
            <fn id="table11fn10">
              <p><sup>j</sup>SC: school and classes.</p>
            </fn>
            <fn id="table11fn11">
              <p><sup>k</sup>EN: enrollment.</p>
            </fn>
            <fn id="table11fn12">
              <p><sup>l</sup>RA: ranking.</p>
            </fn>
            <fn id="table11fn13">
              <p><sup>m</sup>Cells along the diagonal represent the same variable in both row and column, thus no correlation is reported.</p>
            </fn>
            <fn id="table11fn14">
              <p><sup>n</sup>Cells below the diagonal duplicate those above the diagonal and are left blank for clarity.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Classification Results</title>
        <p><xref ref-type="table" rid="table12">Tables 12</xref> and <xref ref-type="table" rid="table13">13</xref> summarize the results of our trained classifiers on the test data. As accuracy can be misleadingly high for imbalanced datasets, we also report balanced accuracy. Using this metric, we see that SVM has the best performance on 5 messaging behaviors and topics (offering support, bullying, relationships and sex, politics, and school and classes), with a balanced accuracy of over 0.75 on all but the humor dataset and an average balanced accuracy of 0.7827. CNN was the second-best performer, with the best performance on humor and college living and an average balanced accuracy of 0.7645.</p>
        <table-wrap position="float" id="table12">
          <label>Table 12</label>
          <caption>
            <p>Messaging behavior classification results.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="400"/>
            <col width="160"/>
            <col width="180"/>
            <col width="120"/>
            <col width="110"/>
            <thead>
              <tr valign="top">
                <td colspan="2">Metric and classifier</td>
                <td>Seeking help</td>
                <td>Offering support</td>
                <td>Bullying</td>
                <td>Humor</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="6">
                  <bold>Accuracy</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Random forest</td>
                <td>
                  <italic>0.9269</italic>
                  <sup>a</sup>
                </td>
                <td>
                  <italic>0.8120</italic>
                </td>
                <td>
                  <italic>0.9299</italic>
                </td>
                <td>0.6417</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>SVM<sup>b</sup></td>
                <td>0.6771</td>
                <td>0.7501</td>
                <td>0.9240</td>
                <td>
                  <italic>0.8385</italic>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>CNN<sup>c</sup></td>
                <td>0.9098</td>
                <td>0.6618</td>
                <td>0.9146</td>
                <td>0.7195</td>
              </tr>
              <tr valign="top">
                <td colspan="6">
                  <bold>Balanced accuracy</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Random forest</td>
                <td>
                  <italic>0.8575</italic>
                </td>
                <td>0.7151</td>
                <td>0.6763</td>
                <td>0.6392</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>SVM</td>
                <td>0.8007</td>
                <td>
                  <italic>0.7514</italic>
                </td>
                <td>
                  <italic>0.7750</italic>
                </td>
                <td>0.6543</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>CNN</td>
                <td>0.6557</td>
                <td>0.7313</td>
                <td>0.7702</td>
                <td>
                  <italic>0.6942</italic>
                </td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table12fn1">
              <p><sup>a</sup>The highest accuracy and balanced accuracy achieved for each messaging behavior are italicized for emphasis.</p>
            </fn>
            <fn id="table12fn2">
              <p><sup>b</sup>SVM: support vector machine.</p>
            </fn>
            <fn id="table12fn3">
              <p><sup>c</sup>CNN: convolutional neural network.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <table-wrap position="float" id="table13">
          <label>Table 13</label>
          <caption>
            <p>Topic classification results.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="400"/>
            <col width="0"/>
            <col width="160"/>
            <col width="0"/>
            <col width="180"/>
            <col width="0"/>
            <col width="120"/>
            <col width="0"/>
            <col width="110"/>
            <thead>
              <tr valign="top">
                <td colspan="3">Metric and classifier</td>
                <td colspan="2">Relationships and sex</td>
                <td colspan="2">College living</td>
                <td colspan="2">Politics</td>
                <td>School and classes</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="10">
                  <bold>Accuracy</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Random forest</td>
                <td colspan="2">0.8209</td>
                <td colspan="2">
                  <italic>0.9028</italic>
                  <sup>a</sup>
                </td>
                <td colspan="2">0.8704</td>
                <td colspan="2">0.9387</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>SVM<sup>b</sup></td>
                <td colspan="2">
                  <italic>0.8521</italic>
                </td>
                <td colspan="2">0.8981</td>
                <td colspan="2">
                  <italic>0.9405</italic>
                </td>
                <td colspan="2">
                  <italic>0.9499</italic>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>CNN<sup>c</sup></td>
                <td colspan="2">0.7943</td>
                <td colspan="2">0.8533</td>
                <td colspan="2">0.9399</td>
                <td colspan="2">0.9010</td>
              </tr>
              <tr valign="top">
                <td colspan="10">
                  <bold>Balanced accuracy</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Random forest</td>
                <td colspan="2">0.7380</td>
                <td colspan="2">0.7323</td>
                <td colspan="2">0.7775</td>
                <td colspan="2">0.7899</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>SVM</td>
                <td colspan="2">
                  <italic>0.8145</italic>
                </td>
                <td colspan="2">0.7842</td>
                <td colspan="2">
                  <italic>0.8605</italic>
                </td>
                <td colspan="2">
                  <italic>0.8212</italic>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>CNN</td>
                <td colspan="2">0.7902</td>
                <td colspan="2">
                  <italic>0.8075</italic>
                </td>
                <td colspan="2">0.8524</td>
                <td colspan="2">0.8147</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table13fn1">
              <p><sup>a</sup>The highest accuracy and balanced accuracy achieved for each topic are italicized for emphasis.</p>
            </fn>
            <fn id="table13fn2">
              <p><sup>b</sup>SVM: support vector machine.</p>
            </fn>
            <fn id="table13fn3">
              <p><sup>c</sup>CNN: convolutional neural network.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <p>Owing to the growing popularity of social media across all segments of society, researchers have a plethora of data sources from which they can derive new insights about people’s social and health-related attitudes, behaviors, and beliefs. The ability to observe social media users in near real time holds particular promise in the domain of public health and health care, where rapid detection of health-relevant events and timely intervention are essential. This study aimed to explore the prevalence of information pertaining to college students’ health and well-being contained in their conversations on an anonymous social network. To this end, we analyzed the frequency and popularity of prosocial messages and bullying messages as well as the frequency and popularity of topics discussed on the web.</p>
        <p>In our dataset, prosocial messages (seeking help, offering support, and humor) appeared more frequently than bullying messages (1735/16,966, 10.23% vs 332/16,966, 1.96%), and there were significant regional differences in the frequency of messages associated with support or bullying. Notably, Yik Yak users attending TX colleges sent the fewest supportive messages and the most bullying messages. We should interpret this finding with caution in light of the relatively small number of messages and universities considered for our study. Nevertheless, this finding highlights a potentially problematic pattern of social media use among college students that future research may link to adverse health outcomes. Unsurprisingly, bullying messages were the least popular, and humorous messages were the most popular among Yik Yak users, independent of the state in which they lived.</p>
        <p>To identify the topics of Yik Yak messages, we relied on statistical modeling as an alternative to the subjective classification scheme recently used by Black et al [<xref ref-type="bibr" rid="ref18">18</xref>]. A subsequent analysis of topic prevalence revealed that relationships and sex was the most frequently discussed topic among college students. School and classes turned out to be the least popular topic, as measured by the number of upvotes and downvotes a message received. From an intervention point of view, regional differences in topic frequency and popularity matter because they offer campus representatives and health professionals clues on how to best engage a student population, both on the web and offline. Although the relative popularity of topics was similar across states, we found greater regional variation in the relative frequency of topics. For example, 7.44% (318/4273) of Yik Yak messages in the state of NY discussed politics compared with only 1.00% (35/3503) in TX, and college living was addressed in 5.60% (252/4496) of messages in CA but in only 2.28% (107/4694) of messages in FL.</p>
        <p>With our final correlational analysis, we wanted to learn more about factors that promote prosocial web-based behaviors and prevent cyberbullying at US colleges. Several findings are worth noting. At schools where students often sought help through messages, messages offering support were also more frequent. We speculate that students may offer support in response to requests for help, but the reverse relationship is also conceivable: at schools where support is offered frequently, students may feel encouraged to ask for help. A higher prevalence of supportive messages also appears to be a characteristic of higher-ranking universities. Although the <italic>Wall Street Journal</italic>/Times Higher Education’s college rankings [<xref ref-type="bibr" rid="ref23">23</xref>] do not take into account social support between students, some hidden factors that lead to a higher prevalence of social support may have also been indirectly captured by their methodology. Our observation of a positive relationship between the popularity of messages offering support and the frequency of the school and classes topic may be explained by a positive response, in the form of upvotes, to support offered to students expressing frustrations with coursework and exams. It is more difficult to interpret why messages of support were sent more often at schools where relationships and sex were discussed less frequently. This requires further investigation.</p>
        <p>Two results speak directly to the frequency of cyberbullying on college campuses. First, there was a positive relationship between bullying and the popularity of messages seeking help. One interpretation for this finding is that students react prosocially to a higher prevalence of bullying by encouraging help-seeking behavior, although they did not appear to actually offer more support (the correlation between the frequency of supporting and bullying messages was negative and not significant). An alternative hypothesis is that certain prosocial messaging behaviors can trigger cyberbullying. Additionally, students at schools with a higher incidence of bullying frequently discussed politics. This result is unsurprising given the often-heated nature of political discussions.</p>
        <p>Of the results regarding the frequency of messages about college living, the positive relationship with the frequency of messages about classes is understandable, given that these 2 topics reflect much of the college experience. However, messages about college living are less frequent at schools with lower enrollment rates. One possible explanation may be that smaller schools have less on-campus housing relative to the number of students, but further study is necessary to make this determination.</p>
        <p>Our text classification experiments demonstrate the feasibility of automatic classification of the messaging behaviors and topics in this study. The balanced accuracy of the SVM classifier on the test data was reasonably high for most messaging behaviors and topics. Its worst performance was with the humor dataset, which also had the lowest balanced accuracy with the random forest classifier and the second lowest balanced accuracy with the CNN classifier. This may be because of the complexity of humor—forms of humor such as innuendo, sarcasm, and satire may be difficult for a machine learning algorithm to identify.</p>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>This study has strong implications for education, public health, and broader fields of health care. Educators could use similar methods to find topics that may be engaging to students on campus. In particular, campus administrators and health service units could identify topic areas where students could engage in a campus-wide dialogue. This could also be helpful for public health professionals because it would provide insight into campus conversations that lead to bullying or hostility. Educators and clinicians could work together to foster a healthier dialogue around the subject and encourage a campus culture of reaching out to fellow students to offer support. In addition to gaining insights into conversations on college campuses, this study represents a first step in guiding research focused on anonymous social networks. The results of this study can help promote the labeling and mining of social data to help students, parents, administrators, and health care workers identify cyberbullying and design interventions to stop it.</p>
        <p>This type of work naturally presents opportunities for computer scientists working in health services as well. Mining data from anonymous social networks can extend beyond the college campus and to the public. Computer scientists can design tools to mine and categorize public social data and help create an even farther-reaching monitoring system for educators and public health professionals [<xref ref-type="bibr" rid="ref36">36</xref>].</p>
        <p>The major limitations of this study include the small number of colleges and universities considered, the lack of ability to generalize as Yik Yak has closed down since this study was conducted, the modest number of Yik Yak messages per school, and the relatively small number of classifier hyperparameters evaluated. We, therefore, caution against generalizing our findings until they can be replicated with larger samples and on other anonymous social networks. The main intention of this study was to understand students’ web-based behaviors and interests from their messages on an anonymous social network and, more specifically, to garner initial insight into conditions affecting prosocial and antisocial uses of social media that could be integrated into health services. We believe that the findings reported here can be a stepping stone to further research on this topic as well as differences in health behaviors and risks communicated on anonymous social networks vs nonanonymous social networks.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group/>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">ANOVA</term>
          <def>
            <p>analysis of variance</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">CA</term>
          <def>
            <p>California</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">CNN</term>
          <def>
            <p>convolutional neural network</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">FL</term>
          <def>
            <p>Florida</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">GloVe</term>
          <def>
            <p>Global vectors for word representation</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">LDA</term>
          <def>
            <p>latent Dirichlet allocation</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">MSE</term>
          <def>
            <p>mean square error</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">NY</term>
          <def>
            <p>New York</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb9">SVM</term>
          <def>
            <p>support vector machine</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb10">TF-IDF</term>
          <def>
            <p>term frequency-inverse document frequency</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb11">TX</term>
          <def>
            <p>Texas</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <fn-group>
      <fn fn-type="con">
        <p>RR performed text classification experiments, determined statistical characteristics of messaging behaviors and topics, and assisted in writing the manuscript. MS collected and analyzed the data and wrote part of the manuscript. RG worked on the analysis and writing. VH led the technical research. SY led the problem formulation and discussion.</p>
      </fn>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Garett</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Young</surname>
              <given-names>SD</given-names>
            </name>
          </person-group>
          <article-title>A longitudinal analysis of stress among incoming college freshmen</article-title>
          <source>J Am Coll Health</source>
          <year>2017</year>
          <month>07</month>
          <volume>65</volume>
          <issue>5</issue>
          <fpage>331</fpage>
          <lpage>8</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/28362146"/>
          </comment>
          <pub-id pub-id-type="doi">10.1080/07448481.2017.1312413</pub-id>
          <pub-id pub-id-type="medline">28362146</pub-id>
          <pub-id pub-id-type="pmcid">PMC6122850</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Vail-Smith</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Felts</surname>
              <given-names>WM</given-names>
            </name>
            <name name-style="western">
              <surname>Becker</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Relationship between sleep quality and health risk behaviors in undergraduate college students</article-title>
          <source>Coll Stud J</source>
          <year>2009</year>
          <month>09</month>
          <day>1</day>
          <volume>43</volume>
          <issue>3</issue>
          <fpage>924</fpage>
          <lpage>31</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.researchgate.net/profile/Craig_Becker/publication/235968091_Relationship_between_sleep_quality_and_health_risk_behaviors_in_undergraduate_college_students/links/55e04ece08aede0b572d5437.pdf"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Galambos</surname>
              <given-names>NL</given-names>
            </name>
            <name name-style="western">
              <surname>Dalton</surname>
              <given-names>AL</given-names>
            </name>
            <name name-style="western">
              <surname>Maggs</surname>
              <given-names>JL</given-names>
            </name>
          </person-group>
          <article-title>Losing sleep over it: daily variation in sleep quantity and quality in Canadian students</article-title>
          <source>J Res Adolesc</source>
          <year>2009</year>
          <month>12</month>
          <volume>19</volume>
          <issue>4</issue>
          <fpage>741</fpage>
          <lpage>61</lpage>
          <pub-id pub-id-type="doi">10.1111/j.1532-7795.2009.00618.x</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Williams</surname>
              <given-names>PG</given-names>
            </name>
            <name name-style="western">
              <surname>Moroz</surname>
              <given-names>TL</given-names>
            </name>
          </person-group>
          <article-title>Personality vulnerability to stress-related sleep disruption: pathways to adverse mental and physical health outcomes</article-title>
          <source>Pers Individ Differ</source>
          <year>2009</year>
          <month>04</month>
          <day>1</day>
          <volume>46</volume>
          <issue>5-6</issue>
          <fpage>598</fpage>
          <lpage>603</lpage>
          <pub-id pub-id-type="doi">10.1016/j.paid.2008.12.017</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Roane</surname>
              <given-names>BM</given-names>
            </name>
            <name name-style="western">
              <surname>Seifer</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Sharkey</surname>
              <given-names>KM</given-names>
            </name>
            <name name-style="western">
              <surname>van Reen</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Bond</surname>
              <given-names>TL</given-names>
            </name>
            <name name-style="western">
              <surname>Raffray</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Carskadon</surname>
              <given-names>MA</given-names>
            </name>
          </person-group>
          <article-title>What role does sleep play in weight gain in the first semester of university?</article-title>
          <source>Behav Sleep Med</source>
          <year>2015</year>
          <month>11</month>
          <day>2</day>
          <volume>13</volume>
          <issue>6</issue>
          <fpage>491</fpage>
          <lpage>505</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/25115969"/>
          </comment>
          <pub-id pub-id-type="doi">10.1080/15402002.2014.940109</pub-id>
          <pub-id pub-id-type="medline">25115969</pub-id>
          <pub-id pub-id-type="pmcid">PMC4892182</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Curcio</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Ferrara</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>de Gennaro</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Sleep loss, learning capacity and academic performance</article-title>
          <source>Sleep Med Rev</source>
          <year>2006</year>
          <month>10</month>
          <day>1</day>
          <volume>10</volume>
          <issue>5</issue>
          <fpage>323</fpage>
          <lpage>37</lpage>
          <pub-id pub-id-type="doi">10.1016/j.smrv.2005.11.001</pub-id>
          <pub-id pub-id-type="medline">16564189</pub-id>
          <pub-id pub-id-type="pii">S1087-0792(05)00123-1</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bauman</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Toomey</surname>
              <given-names>RB</given-names>
            </name>
            <name name-style="western">
              <surname>Walker</surname>
              <given-names>JL</given-names>
            </name>
          </person-group>
          <article-title>Associations among bullying, cyberbullying, and suicide in high school students</article-title>
          <source>J Adolesc</source>
          <year>2013</year>
          <month>04</month>
          <day>1</day>
          <volume>36</volume>
          <issue>2</issue>
          <fpage>341</fpage>
          <lpage>50</lpage>
          <pub-id pub-id-type="doi">10.1016/j.adolescence.2012.12.001</pub-id>
          <pub-id pub-id-type="medline">23332116</pub-id>
          <pub-id pub-id-type="pii">S0140-1971(12)00181-9</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gámez-Guadix</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Orue</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Smith</surname>
              <given-names>PK</given-names>
            </name>
            <name name-style="western">
              <surname>Calvete</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Longitudinal and reciprocal relations of cyberbullying with depression, substance use, and problematic internet use among adolescents</article-title>
          <source>J Adolesc Health</source>
          <year>2013</year>
          <month>10</month>
          <day>1</day>
          <volume>53</volume>
          <issue>4</issue>
          <fpage>446</fpage>
          <lpage>52</lpage>
          <pub-id pub-id-type="doi">10.1016/j.jadohealth.2013.03.030</pub-id>
          <pub-id pub-id-type="medline">23721758</pub-id>
          <pub-id pub-id-type="pii">S1054-139X(13)00214-0</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Nansel</surname>
              <given-names>TR</given-names>
            </name>
            <name name-style="western">
              <surname>Iannotti</surname>
              <given-names>RJ</given-names>
            </name>
          </person-group>
          <article-title>Cyber and traditional bullying: differential association with depression</article-title>
          <source>J Adolesc Health</source>
          <year>2011</year>
          <month>04</month>
          <day>1</day>
          <volume>48</volume>
          <issue>4</issue>
          <fpage>415</fpage>
          <lpage>7</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/21402273"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jadohealth.2010.07.012</pub-id>
          <pub-id pub-id-type="medline">21402273</pub-id>
          <pub-id pub-id-type="pii">S1054-139X(10)00343-5</pub-id>
          <pub-id pub-id-type="pmcid">PMC3058261</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Young</surname>
              <given-names>SD</given-names>
            </name>
          </person-group>
          <article-title>Behavioral insights on big data: using social media for predicting biomedical outcomes</article-title>
          <source>Trends Microbiol</source>
          <year>2014</year>
          <month>11</month>
          <day>1</day>
          <volume>22</volume>
          <issue>11</issue>
          <fpage>601</fpage>
          <lpage>2</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/25438614"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.tim.2014.08.004</pub-id>
          <pub-id pub-id-type="medline">25438614</pub-id>
          <pub-id pub-id-type="pii">S0966-842X(14)00160-7</pub-id>
          <pub-id pub-id-type="pmcid">PMC4364914</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Young</surname>
              <given-names>SD</given-names>
            </name>
          </person-group>
          <article-title>A 'big data' approach to HIV epidemiology and prevention</article-title>
          <source>Prev Med</source>
          <year>2015</year>
          <month>01</month>
          <volume>70</volume>
          <fpage>17</fpage>
          <lpage>8</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/25449693"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.ypmed.2014.11.002</pub-id>
          <pub-id pub-id-type="medline">25449693</pub-id>
          <pub-id pub-id-type="pii">S0091-7435(14)00402-2</pub-id>
          <pub-id pub-id-type="pmcid">PMC4364912</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Young</surname>
              <given-names>SD</given-names>
            </name>
            <name name-style="western">
              <surname>Rivers</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Lewis</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>Methods of using real-time social media technologies for detection and remote monitoring of HIV outcomes</article-title>
          <source>Prev Med</source>
          <year>2014</year>
          <month>06</month>
          <day>1</day>
          <volume>63</volume>
          <fpage>112</fpage>
          <lpage>5</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/24513169"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.ypmed.2014.01.024</pub-id>
          <pub-id pub-id-type="medline">24513169</pub-id>
          <pub-id pub-id-type="pii">S0091-7435(14)00055-3</pub-id>
          <pub-id pub-id-type="pmcid">PMC4031268</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>De Choudhury</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Gamon</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Counts</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Horvitz</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Predicting Depression via Social Media</article-title>
          <source>Proceedings of the Seventh International AAAI Conference on Weblogs and Social Media</source>
          <year>2013</year>
          <conf-name>AAAI'13</conf-name>
          <conf-date>July 8-11, 2013</conf-date>
          <conf-loc>Cambridge, MA</conf-loc>
          <fpage>128</fpage>
          <lpage>37</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.aaai.org/ocs/index.php/ICWSM/ICWSM13/paper/view/6124/6351"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mahler</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <source>The New York Times</source>
          <year>2015</year>
          <month>03</month>
          <day>9</day>
          <access-date>2016-11-21</access-date>
          <comment>Who Spewed That Abuse? Anonymous Yik Yak App Isn’t Telling<ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.nytimes.com/2015/03/09/technology/popular-yik-yak-app-confers-anonymity-and-delivers-abuse.html">http://www.nytimes.com/2015/03/09/technology/popular-yik-yak-app-confers-anonymity-and-delivers-abuse.html</ext-link>
                                                </comment>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Shontell</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <source>Business Insider</source>
          <year>2015</year>
          <month>03</month>
          <day>28</day>
          <access-date>2016-11-22</access-date>
          <comment>Why a Girl Who Was Viciously Bullied on Yik Yak Now Believes in the Anonymous App's Future<ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.businessinsider.com/elizabeth-long-was-bullied-on-yik-yak-2015-3">http://www.businessinsider.com/elizabeth-long-was-bullied-on-yik-yak-2015-3</ext-link>
                                                </comment>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Valencia</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <source>CNN</source>
          <year>2014</year>
          <month>03</month>
          <day>7</day>
          <access-date>2016-11-14</access-date>
          <comment>Yik Yak Chat App Stirring Up Trouble in High Schools<ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.cnn.com/2014/03/07/tech/yik-yak-app-high-school-problems/">https://www.cnn.com/2014/03/07/tech/yik-yak-app-high-school-problems/</ext-link>
                                                </comment>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Nika</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Zheng</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>BY</given-names>
            </name>
          </person-group>
          <article-title>Whispers in the Dark: Analysis of an Anonymous Social Network</article-title>
          <source>Proceedings of the 2014 Conference on Internet Measurement Conference</source>
          <year>2014</year>
          <conf-name>IMC'14</conf-name>
          <conf-date>November 5-7, 2014</conf-date>
          <conf-loc>Vancouver, Canada</conf-loc>
          <pub-id pub-id-type="doi">10.1145/2663716.2663728</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Black</surname>
              <given-names>EW</given-names>
            </name>
            <name name-style="western">
              <surname>Mezzina</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Thompson</surname>
              <given-names>LA</given-names>
            </name>
          </person-group>
          <article-title>Anonymous social media–understanding the content and context of Yik Yak</article-title>
          <source>Comput Hum Behav</source>
          <year>2016</year>
          <month>04</month>
          <day>1</day>
          <volume>57</volume>
          <fpage>17</fpage>
          <lpage>22</lpage>
          <pub-id pub-id-type="doi">10.1016/j.chb.2015.11.043</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Nova</surname>
              <given-names>FF</given-names>
            </name>
            <name name-style="western">
              <surname>Rifat</surname>
              <given-names>MR</given-names>
            </name>
            <name name-style="western">
              <surname>Saha</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Ahmed</surname>
              <given-names>SI</given-names>
            </name>
            <name name-style="western">
              <surname>Guha</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Online Sexual Harassment Over Anonymous Social Media in Bangladesh</article-title>
          <source>Proceedings of the Tenth International Conference on Information and Communication Technologies and Development</source>
          <year>2019</year>
          <conf-name>ICTD'19</conf-name>
          <conf-date>January 4-7, 2019</conf-date>
          <conf-loc>Ahmedabad, India</conf-loc>
          <pub-id pub-id-type="doi">10.1145/3287098.3287107</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gerhart</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Koohikamali</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Social network migration and anonymity expectations: what anonymous social network apps offer</article-title>
          <source>Comput Hum Behav</source>
          <year>2019</year>
          <month>06</month>
          <day>1</day>
          <volume>95</volume>
          <fpage>101</fpage>
          <lpage>13</lpage>
          <pub-id pub-id-type="doi">10.1016/j.chb.2019.01.030</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Batson</surname>
              <given-names>CD</given-names>
            </name>
            <name name-style="western">
              <surname>Powell</surname>
              <given-names>AA</given-names>
            </name>
          </person-group>
          <article-title>Altruism and prosocial behavior</article-title>
          <source>Handbook of Psychology</source>
          <year>2003</year>
          <publisher-loc>Hoboken, NJ</publisher-loc>
          <publisher-name>John Wiley &#38; Sons</publisher-name>
          <fpage>463</fpage>
          <lpage>84</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Drouin</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Reining</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Flanagan</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Carpenter</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Toscos</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>College students in distress: can social media be a source of social support?</article-title>
          <source>Coll Stud J</source>
          <year>2018</year>
          <month>12</month>
          <day>1</day>
          <volume>52</volume>
          <issue>4</issue>
          <fpage>494</fpage>
          <lpage>504</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.questia.com/library/journal/1G1-572402027/college-students-in-distress-can-social-media-be"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="web">
          <source>Times Higher Education</source>
          <year>2016</year>
          <access-date>2016-12-09</access-date>
          <comment>Wall Street Journal/Times Higher Education College Rankings 2017<ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.timeshighereducation.com/rankings/united-states/2017">http://www.timeshighereducation.com/rankings/united-states/2017</ext-link>
                                                </comment>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="web">
          <source>The University of Texas Rio Grande Valley</source>
          <year>2016</year>
          <access-date>2016-12-09</access-date>
          <comment>Office of Strategic Analysis and Institutional Reporting (SAIR)<ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.utrgv.edu/sair/">http://www.utrgv.edu/sair/</ext-link>
                                                </comment>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Garett</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Lord</surname>
              <given-names>LR</given-names>
            </name>
            <name name-style="western">
              <surname>Young</surname>
              <given-names>SD</given-names>
            </name>
          </person-group>
          <article-title>Associations between social media and cyberbullying: a review of the literature</article-title>
          <source>Mhealth</source>
          <year>2016</year>
          <month>12</month>
          <day>19</day>
          <volume>2</volume>
          <fpage>46</fpage>
          <pub-id pub-id-type="doi">10.21037/mhealth.2016.12.01</pub-id>
          <pub-id pub-id-type="medline">28293616</pub-id>
          <pub-id pub-id-type="pii">mh-02-2016.12.01</pub-id>
          <pub-id pub-id-type="pmcid">PMC5344141</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Blei</surname>
              <given-names>DM</given-names>
            </name>
            <name name-style="western">
              <surname>Ng</surname>
              <given-names>AY</given-names>
            </name>
            <name name-style="western">
              <surname>Jordan</surname>
              <given-names>MI</given-names>
            </name>
          </person-group>
          <article-title>Latent dirichlet allocation</article-title>
          <source>J Mach Learn Res</source>
          <year>2003</year>
          <month>01</month>
          <volume>3</volume>
          <fpage>993</fpage>
          <lpage>1022</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.jmlr.org/papers/volume3/blei03a/blei03a.pdf"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Breiman</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Random forests</article-title>
          <source>Mach Learn</source>
          <year>2001</year>
          <month>10</month>
          <day>1</day>
          <volume>45</volume>
          <issue>1</issue>
          <fpage>5</fpage>
          <lpage>32</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://link.springer.com/content/pdf/10.1023/A:1010933404324.pdf"/>
          </comment>
          <pub-id pub-id-type="doi">10.1023/A:1010933404324</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cortes</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Vapnik</surname>
              <given-names>V</given-names>
            </name>
          </person-group>
          <article-title>Support-vector networks</article-title>
          <source>Mach Learn</source>
          <year>1995</year>
          <month>09</month>
          <volume>20</volume>
          <issue>3</issue>
          <fpage>273</fpage>
          <lpage>97</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://link.springer.com/article/10.1007/BF00994018"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/bf00994018</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Manning</surname>
              <given-names>CD</given-names>
            </name>
            <name name-style="western">
              <surname>Raghavan</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Schütze</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Scoring, term weighting and the vector space model</article-title>
          <source>Introduction to Information Retrieval</source>
          <year>2008</year>
          <publisher-loc>Cambridge, England</publisher-loc>
          <publisher-name>Cambridge University Press</publisher-name>
          <fpage>109</fpage>
          <lpage>33</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Convolutional Neural Networks for Sentence Classification</article-title>
          <source>Proceedings of the 2014 Conference on Empirical Methods in Natural Language Processin</source>
          <year>2014</year>
          <conf-name>EMNLP'14</conf-name>
          <conf-date>October 25-29, 2014</conf-date>
          <conf-loc>Doha, Qatar</conf-loc>
          <fpage>1746</fpage>
          <lpage>51</lpage>
          <pub-id pub-id-type="doi">10.3115/v1/d14-1181</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Pennington</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Socher</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Manning</surname>
              <given-names>CD</given-names>
            </name>
          </person-group>
          <article-title>Glove: Global Vectors for Word Representation</article-title>
          <source>Proceedings of the 2014 Conference on Empirical Methods in Natural Language Processing</source>
          <year>2014</year>
          <conf-name>EMNLP'14</conf-name>
          <conf-date>October 25-29, 2014</conf-date>
          <conf-loc>Doha, Qatar</conf-loc>
          <fpage>1532</fpage>
          <lpage>43</lpage>
          <pub-id pub-id-type="doi">10.3115/v1/d14-1162</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Brodersen</surname>
              <given-names>KH</given-names>
            </name>
            <name name-style="western">
              <surname>Ong</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Stephan</surname>
              <given-names>KE</given-names>
            </name>
            <name name-style="western">
              <surname>Buhmann</surname>
              <given-names>JM</given-names>
            </name>
          </person-group>
          <article-title>The Balanced Accuracy and Its Posterior Distribution</article-title>
          <source>Proceedings of the 20th International Conference on Pattern Recognition</source>
          <year>2010</year>
          <conf-name>ICPR'10</conf-name>
          <conf-date>August 23-26, 2010</conf-date>
          <conf-loc>Istanbul, Turkey</conf-loc>
          <pub-id pub-id-type="doi">10.1109/icpr.2010.764</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bird</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Klein</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Loper</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <source>Natural Language Processing with Python</source>
          <year>2020</year>
          <publisher-loc>Sebastopol, CA</publisher-loc>
          <publisher-name>O'reilly Media</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Pedregosa</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Varoquaux</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Gramfort</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Michel</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Thirion</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Grisel</surname>
              <given-names>O</given-names>
            </name>
          </person-group>
          <article-title>Scikit-learn: machine learning in Python</article-title>
          <source>J Mach Learn Res</source>
          <year>2011</year>
          <month>10</month>
          <day>1</day>
          <volume>12</volume>
          <issue>85</issue>
          <fpage>2825</fpage>
          <lpage>30</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.jmlr.org/papers/volume12/pedregosa11a/pedregosa11a.pdf"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ng</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <source>GitHub</source>
          <year>2018</year>
          <month>12</month>
          <day>27</day>
          <access-date>2019-03-18</access-date>
          <comment>CNNs for Sentence Classification in PyTorch<ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://github.com/Shawn1993/cnn-text-classification-pytorch">https://github.com/Shawn1993/cnn-text-classification-pytorch</ext-link>
                                                </comment>
        </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Benbow</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Kirkpatrick</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Gupta</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Villamar</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Chernyshov</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Cramer</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Mena</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Mayer</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Young</surname>
              <given-names>SD</given-names>
            </name>
          </person-group>
          <article-title>An iterative process of integrating and developing big data modeling and visualization tools in collaboration with public health officials</article-title>
          <source>Sage Research Methods Cases: Medicine and Health</source>
          <year>2020</year>
          <publisher-loc>Thousand Oaks, CA</publisher-loc>
          <publisher-name>Sage Publications</publisher-name>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
