<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMIR</journal-id>
      <journal-id journal-id-type="nlm-ta">J Med Internet Res</journal-id>
      <journal-title>Journal of Medical Internet Research</journal-title>
      <issn pub-type="epub">1438-8871</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v25i1e45249</article-id>
      <article-id pub-id-type="pmid">37079359</article-id>
      <article-id pub-id-type="doi">10.2196/45249</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Disruptions in the Cystic Fibrosis Community’s Experiences and Concerns During the COVID-19 Pandemic: Topic Modeling and Time Series Analysis of Reddit Comments</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Mavragani</surname>
            <given-names>Amaryllis</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Timakum</surname>
            <given-names>Tatsawan</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Stracqualursi</surname>
            <given-names>Luisa</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Yao</surname>
            <given-names>Lean Franzl</given-names>
          </name>
          <degrees>MAppMath</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-3184-9368</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Ferawati</surname>
            <given-names>Kiki</given-names>
          </name>
          <degrees>MStat</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-0717-0769</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Liew</surname>
            <given-names>Kongmeng</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-0755-7173</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Wakamiya</surname>
            <given-names>Shoko</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-9371-1340</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Aramaki</surname>
            <given-names>Eiji</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>Social Computing Laboratory</institution>
            <institution>Nara Institute of Science and Technology</institution>
            <addr-line>Informationa Science Building A, 6th Floor</addr-line>
            <addr-line>8916-5 Takayama-cho, Nara Prefecture</addr-line>
            <addr-line>Ikoma, 630-0192</addr-line>
            <country>Japan</country>
            <phone>81 743 72 5250</phone>
            <email>aramaki@is.naist.jp</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-0201-3609</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Social Computing Laboratory</institution>
        <institution>Nara Institute of Science and Technology</institution>
        <addr-line>Ikoma</addr-line>
        <country>Japan</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Eiji Aramaki <email>aramaki@is.naist.jp</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <year>2023</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>20</day>
        <month>4</month>
        <year>2023</year>
      </pub-date>
      <volume>25</volume>
      <elocation-id>e45249</elocation-id>
      <history>
        <date date-type="received">
          <day>30</day>
          <month>12</month>
          <year>2022</year>
        </date>
        <date date-type="rev-request">
          <day>26</day>
          <month>1</month>
          <year>2023</year>
        </date>
        <date date-type="rev-recd">
          <day>15</day>
          <month>3</month>
          <year>2023</year>
        </date>
        <date date-type="accepted">
          <day>16</day>
          <month>3</month>
          <year>2023</year>
        </date>
      </history>
      <copyright-statement>©Lean Franzl Yao, Kiki Ferawati, Kongmeng Liew, Shoko Wakamiya, Eiji Aramaki. Originally published in the Journal of Medical Internet Research (https://www.jmir.org), 20.04.2023.</copyright-statement>
      <copyright-year>2023</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in the Journal of Medical Internet Research, is properly cited. The complete bibliographic information, a link to the original publication on https://www.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://www.jmir.org/2023/1/e45249" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>The COVID-19 pandemic disrupted the needs and concerns of the cystic fibrosis community. Patients with cystic fibrosis were particularly vulnerable during the pandemic due to overlapping symptoms in addition to the challenges patients with rare diseases face, such as the need for constant medical aid and limited information regarding their disease or treatments. Even before the pandemic, patients vocalized these concerns on social media platforms like Reddit and formed communities and networks to share insight and information. This data can be used as a quick and efficient source of information about the experiences and concerns of patients with cystic fibrosis in contrast to traditional survey- or clinical-based methods.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>This study applies topic modeling and time series analysis to identify the disruption caused by the COVID-19 pandemic and its impact on the cystic fibrosis community’s experiences and concerns. This study illustrates the utility of social media data in gaining insight into the experiences and concerns of patients with rare diseases.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>We collected comments from the subreddit r/CysticFibrosis to represent the experiences and concerns of the cystic fibrosis community. The comments were preprocessed before being used to train the BERTopic model to assign each comment to a topic. The number of comments and active users for each data set was aggregated monthly per topic and then fitted with an autoregressive integrated moving average (ARIMA) model to study the trends in activity. To verify the disruption in trends during the COVID-19 pandemic, we assigned a dummy variable in the model where a value of “1” was assigned to months in 2020 and “0” otherwise and tested for its statistical significance.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>A total of 120,738 comments from 5827 users were collected from March 24, 2011, until August 31, 2022. We found 22 topics representing the cystic fibrosis community’s experiences and concerns. Our time series analysis showed that for 9 topics, the COVID-19 pandemic was a statistically significant event that disrupted the trends in user activity. Of the 9 topics, only 1 showed significantly increased activity during this period, while the other 8 showed decreased activity. This mixture of increased and decreased activity for these topics indicates a shift in attention or focus on discussion topics during this period.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>There was a disruption in the experiences and concerns the cystic fibrosis community faced during the COVID-19 pandemic. By studying social media data, we were able to quickly and efficiently study the impact on the lived experiences and daily struggles of patients with cystic fibrosis. This study shows how social media data can be used as an alternative source of information to gain insight into the needs of patients with rare diseases and how external factors disrupt them.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>COVID-19</kwd>
        <kwd>Reddit</kwd>
        <kwd>time series analysis</kwd>
        <kwd>BERTopic</kwd>
        <kwd>topic modeling</kwd>
        <kwd>cystic fibrosis</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <p>The COVID-19 pandemic was the first global pandemic since the invention of Facebook [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref2">2</xref>]. As lockdowns and quarantine protocols were put in place, the use of social media to spread information rose, providing an abundance of social media data and even leading to an infodemic [<xref ref-type="bibr" rid="ref3">3</xref>,<xref ref-type="bibr" rid="ref4">4</xref>]. Needless to say, people’s day-to-day lives drastically changed during this period in dealing with the threat and risks posed by COVID-19, and all of these were recorded in social media [<xref ref-type="bibr" rid="ref5">5</xref>,<xref ref-type="bibr" rid="ref6">6</xref>].</p>
      <p>Compared to the general population, patients with cystic fibrosis were also faced with the risk of overlapping symptoms with COVID-19 in addition to the challenges already faced by patients with rare diseases that preceded the global pandemic [<xref ref-type="bibr" rid="ref7">7</xref>,<xref ref-type="bibr" rid="ref8">8</xref>]. Cystic fibrosis is a condition caused by a mutation in a gene that affects a person’s production of mucus and sweat and commonly causes pulmonary or respiratory problems; depending on the types of mutations, there can be different symptoms and treatment options [<xref ref-type="bibr" rid="ref9">9</xref>,<xref ref-type="bibr" rid="ref10">10</xref>]. It is possible that some measures already taken by the cystic fibrosis community help mitigate the risk imposed by COVID-19, such as ongoing treatments or the added caution that comes with living with cystic fibrosis [<xref ref-type="bibr" rid="ref11">11</xref>]. However, health measures and protocols raised to deal with the COVID-19 pandemic also disrupted clinic visits, medication, and therapy, among others, in addition to the challenges faced by the general population.</p>
      <p>For the cystic fibrosis community and also other rare disease communities, social media has been a source of information and support even before the global pandemic [<xref ref-type="bibr" rid="ref12">12</xref>,<xref ref-type="bibr" rid="ref13">13</xref>]. Researchers have even used this available information to gain insight into patients and communities with rare diseases [<xref ref-type="bibr" rid="ref14">14</xref>,<xref ref-type="bibr" rid="ref15">15</xref>]. Combined with natural language processing techniques, natural dialogue on social media platforms can provide useful insight into patients’ daily lives. Reddit, for example, is one platform used by patients with rare diseases, which is also used in medical research. Foufi et al [<xref ref-type="bibr" rid="ref16">16</xref>] analyzed chronic diseases using extracted entities and relations from Reddit discussions. Leung et al [<xref ref-type="bibr" rid="ref17">17</xref>] explored COVID-19–related stressors by applying topic modeling to Reddit data. In Zhu et al’s [<xref ref-type="bibr" rid="ref18">18</xref>] study, they illustrated how we could gain insight into the needs of patients with rare diseases by analyzing social media data. They found popular rare disease subreddits and did a case study on the subreddit r/CysticFibrosis. Topic modeling has also been used in other data sets to gain insight into patients’ needs [<xref ref-type="bibr" rid="ref19">19</xref>,<xref ref-type="bibr" rid="ref20">20</xref>].</p>
      <p>Studying social media data of communities with rare diseases provides a ground-up level point of view from patients’ personal experiences and concerns that may not otherwise be shared with medical care providers. Using social media data also has the added benefit of speed since collecting data does not require additional time from patients. Because the use of social media platforms by these communities predates the pandemic, the available data can provide information about the disruptions caused by the pandemic in these communities. In this study, we propose to look at the disruption caused by COVID-19 on the discussion topics of patients with cystic fibrosis through the use of time series analysis and topic models.</p>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Data Collection and Preprocessing</title>
        <p>We collected comments from the subreddit r/CysticFibrosis to represent the experiences and concerns of the cystic fibrosis community. On August 31, 2022, we collected all Reddit comments starting from the inception date of March 24, 2011, using the Pushshift Reddit application programming interface through Python (version 3.9.12; Python Software Foundation). The Pushshift Reddit application programming interface offers a convenient way to collect text data for all comments and submissions in a chosen subreddit. We made this data set, as well as the script we used to collect the data, available on our GitHub repository [<xref ref-type="bibr" rid="ref21">21</xref>].</p>
        <p>We checked for duplicate comments and converted everything to lowercase. We also removed links, tags, and mentions of other users. At this stage, we did not perform any further data cleaning to maintain the natural structure of the comments since the BERTopic library was developed with natural text and has its own way of dealing with noise and outliers.</p>
      </sec>
      <sec>
        <title>Topic Modeling</title>
        <p>BERTopic is a topic modeling technique that uses state-of-the-art language models and applies a class-based term frequency-inverse document, which calculates how relevant a word is to the class of documents and uses a frequency procedure for generating topics [<xref ref-type="bibr" rid="ref22">22</xref>]. We trained a BERTopic model on the Reddit comments using the library bertopic (version 0.12.0 [<xref ref-type="bibr" rid="ref22">22</xref>]). To keep our results consistent and reproducible with each iteration, we set the random state parameter of the Uniform Manifold Approximation and Projection model to 42 using the library umap (version 0.0.1 [<xref ref-type="bibr" rid="ref23">23</xref>]). With these parameter specifications, BERTopic produced 824 topic categories: 823 usable topics and a topic for outliers, topic “–1.” Additional cleaning was done to remove stopwords by defining a vectorizer with sklearn (version 3.7 [<xref ref-type="bibr" rid="ref24">24</xref>]) and setting the stopwords using the list from spacy (version 3.3.1 [<xref ref-type="bibr" rid="ref25">25</xref>]). We also included “im,” “like,” “use,” “ive,” and “ill” to the list of stopwords before finally reducing the number of topics further.</p>
        <p>We set BERTopic to reduce the number of topics from 823 to 30 and noticed that most of the comments from the dropped topics were classified as outliers rather than belonging to one of the 30 topics. Instructing BERTopic to produce 30 topics from the beginning similarly resulted in more outliers. Thus, in order to mitigate the loss of data and obtain more general topics, we set BERTopic to reduce the number of topics to 30 and then used hierarchical clustering to manually merge topics based on topic similarity metrics.</p>
      </sec>
      <sec>
        <title>Time Series Analysis</title>
        <sec>
          <title>Overview</title>
          <p>To check the significance of an event on time series data, we used tools available in time series analysis. We used R (version 4.2.0; R Core Team) and the packages <italic>tidyverse</italic>, <italic>tseries</italic>, <italic>TSA</italic>, and <italic>lmtest</italic> for the time series analysis.</p>
          <p>To determine whether COVID-19 was a significant predictor of the number of monthly comments, we prepared data sets that contain the number of monthly comments and the number of monthly active users. We prepared data sets to look at the overall activity and also at the individual topics. We took a denoised aggregate subset, that is, disregarding topic –1, to represent the overall activity. Likewise, for each topic, we took the subset of comments that belong to it. We then fit an autoregressive integrated moving average (ARIMA) model on each of these data sets: the denoised aggregate data set and the data sets for each topic. In addition, we also decided to check the significance of COVID-19 on the number of active users per month.</p>
        </sec>
        <sec>
          <title>ARIMA(p,d,q) Model With Exogenous Variables</title>
          <p>We fitted an ARIMA(<italic>p</italic>,<italic>d</italic>,<italic>q</italic>) model for each data set [<xref ref-type="bibr" rid="ref26">26</xref>]. We created a dummy variable to indicate the period of the COVID-19 pandemic. A value of 1 was assigned for the months of January to December 2020, while the months of March 2011 to December 2019 and January 2021 to August 2022 were assigned a value of 0. We chose this start date of the pandemic period based on the month when the first case of COVID-19 was recorded and decided the end date to be the end of 2020 as vaccines and relaxations to health protocols were starting [<xref ref-type="bibr" rid="ref27">27</xref>]. To account for the influence of the number of active users, we incorporated it as an exogenous variable in the model. This was to ensure that we were isolating the effects of each variable, particularly that of the period of COVID-19.</p>
          <p>An ARIMA(<italic>p</italic>,<italic>d</italic>,<italic>q</italic>) model takes 3 parameters: the order of the autoregressive (AR) process (<italic>p</italic>), the number of times to difference the data in order to make it stationary (<italic>d</italic>), and the order of the moving average (MA) process (<italic>q</italic>). Stationarity is an assumption regarding the structure of a stochastic process like time series data; a stationary process is similar to saying that the behavior of the system governing the process does not change. In many cases, taking the differenced data and taking the subtracted value at successive timesteps, is enough to satisfy this assumption. An AR process describes a series wherein the value at a timestep can be described by its previous values (lagged values), while an MA process describes a series wherein the value at a timestep can be described by the lagged residual errors. Fitting an ARIMA model requires checking the stationarity of the data, determining whether it follows an AR, MA, or mixed ARMA process, and then deciding on the order.</p>
          <p>We use the graph of the auto-correlation function (ACF) and the Augmented Dickey-Fuller unit root test to check for stationarity and determine the number of differences needed to make the data stationary. We then graph the ACF and partial auto-correlation function (PACF) together to see whether we would proceed to use an AR, MA, or a mixed ARMA. Although it was not needed in this study, we mention here that the order for an ARMA process is decided based on the extended auto-correlation function. <xref ref-type="table" rid="table1">Table 1</xref> shows how the orders are determined based on the observed behaviors in the ACF and PACF. We also took a conservative stand in deciding the order, opting for lower orders unless the graph compellingly indicated a higher order.</p>
          <table-wrap position="float" id="table1">
            <label>Table 1</label>
            <caption>
              <p>Behavior of auto-correlation function (ACF) and partial auto-correlation function (PACF) for autoregressive (AR), moving average (MA), and ARMA processes.</p>
            </caption>
            <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
              <col width="100"/>
              <col width="300"/>
              <col width="300"/>
              <col width="300"/>
              <thead>
                <tr valign="top">
                  <td/>
                  <td>AR (order <italic>P</italic>)</td>
                  <td>MA (order <italic>q</italic>)</td>
                  <td>ARMA</td>
                </tr>
              </thead>
              <tbody>
                <tr valign="top">
                  <td>ACF</td>
                  <td>Tails off (decays to zero)</td>
                  <td>Cuts off after lag <italic>q</italic></td>
                  <td>Tails off</td>
                </tr>
                <tr valign="top">
                  <td>PACF</td>
                  <td>Cuts off after lag <italic>P</italic></td>
                  <td>Tails off (decays to zero)</td>
                  <td>Tails off</td>
                </tr>
              </tbody>
            </table>
          </table-wrap>
          <p>In time series models, incorporating another time series as an exogenous variable requires additional steps. We first look at the cross-correlation of the prewhitened time series of the number of active users and the number of comments to see which lags are correlated, and then decide on the transfer function to be used to add the exogenous variable to the ARIMA model. We prewhiten the time series data in order to remove linear trends and autocorrelation within each time series to avoid misleading cross-correlations. The cross-correlations then tell us which lags in the time series for the number of users are correlated with the time series for the number of comments.</p>
        </sec>
        <sec>
          <title>Significance Testing</title>
          <p>Once a model has been chosen and fitted to the time series data, we test for the significance of the dummy variable included in the model. The test for significance tells us whether there was a statistically significant change in trend during the COVID-19 period, and the sign of the coefficient tells us whether the effect was an increase or decrease in the number of comments.</p>
        </sec>
      </sec>
      <sec>
        <title>Ethical Considerations</title>
        <p>This study did not require participants to be involved in any physical or mental intervention. As this research did not use personally identifiable information, it was exempt from institutional review board approval in accordance with the Ethical Guidelines for Medical and Health Research Involving Human Subjects stipulated by the Japanese national government.</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Collected Data</title>
        <p>A total of 120,738 comments from 5827 unique user IDs were collected, with dates ranging from inception on March 24, 2011, until August 31, 2022. <xref rid="figure1" ref-type="fig">Figure 1</xref> shows the number of comments and the number of users per month. <xref ref-type="table" rid="table2">Table 2</xref> contains sample comments and their assigned topics.</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>Timeline of monthly active users (A) and monthly comments (B) from March 24, 2011, to August 31, 2022. A red dotted line indicates the date when the World Health Organization declared COVID-19 a public health emergency of international concern. Both timelines show similar trends over the time period. There was a notable surge in active users and the number of comments at the start of 2018, but then it slowed down and started to decrease at the beginning of COVID-19.</p>
          </caption>
          <graphic xlink:href="jmir_v25i1e45249_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>Sample comments and their assigned topics.</p>
          </caption>
          <table border="1" rules="groups" cellpadding="5" frame="hsides" width="1000" cellspacing="0">
            <col width="700"/>
            <col width="300"/>
            <thead>
              <tr valign="top">
                <td>Comment</td>
                <td>Topic</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Better than in USA. Here we need to renew Trikafta pre-authorization every 6 months. And if you lose your job and private insurance coverage, you’re screwed!</td>
                <td>Trikafta and side effects</td>
              </tr>
              <tr valign="top">
                <td>Those results sound amazing!! I’m so excited to try this now! Thank you!!</td>
                <td>Gratitude</td>
              </tr>
              <tr valign="top">
                <td>You’re going to get a whole spectrum of people with different levels of acceptable risk. Some people refuse to ever be in the same room as another CF’er while others sleep together. But yes, I’d agree his handling of this is concerning. Like, what does the world need to look like for him to decide its safe enough to go out again?</td>
                <td>Social life</td>
              </tr>
              <tr valign="top">
                <td>Mine had to be done on my upper arm where there was no hair to interfere with the collection.<break/>My son’s failed to collect any sweat, it’s very common.</td>
                <td>Sweat testing and mutations</td>
              </tr>
              <tr valign="top">
                <td>You can get it at a pot shop. You can use it as an edible too. I was told if you put a couple of pumps in a drink with a lot of fat (hot chocolate made with milk, milkshakes) it works a lot faster. I take it before bed. It helps with my anxiety and the muscle pain in my neck and back.</td>
                <td>Marijuana</td>
              </tr>
              <tr valign="top">
                <td>I dont think I have I just a nose spray that makes me be able to breathe easier with my nose</td>
                <td>Sinuses and breathing</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
      </sec>
      <sec>
        <title>Identified Topics</title>
        <p>The initial 823 topics were too much for us to analyze. We specified our BERTopic model to reduce the number of topics to 30 instead, as a more manageable starting point. <xref rid="figure2" ref-type="fig">Figure 2</xref> shows the top 8 topics and the words with the highest class-based term frequency-inverse document scores representative of each topic, and <xref rid="figure3" ref-type="fig">Figure 3</xref> shows the intertopical distance map between topics that can be used to create more general topics.</p>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>The top 5 representation words based on class-based term frequency-inverse document (c-TF-IDF) scores for the top 8 topics. The c-TF-IDF score represents how relevant a word is in representing the topic, with a higher score meaning that the word is more relevant. In topic 0, the word “trikafta” has a high score compared to the other top words, meaning that the topic representation relies on the presence of the word “trikafta” the most, while other topics have a more equal weighted representation, like topic 7.</p>
          </caption>
          <graphic xlink:href="jmir_v25i1e45249_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <fig id="figure3" position="float">
          <label>Figure 3</label>
          <caption>
            <p>A 2D graph of the intertopical distance map using the embedded class-based term frequency-inverse document (c-TF-IDF) representations of the topic. The size of the circles represents how many documents were classified into that topic, and the x- and y-axes have no meaning or interpretation, but the distance between topics represents the similarity between them, where more similar topics are closer to each other. This plot can give an idea of how topics can be clustered together to form more general topics.</p>
          </caption>
          <graphic xlink:href="jmir_v25i1e45249_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <p>A dendrogram graph was generated to visualize the hierarchical clustering of the 30 topics to see which ones were similar enough to be merged (<xref rid="figure4" ref-type="fig">Figure 4</xref>). The clustering was based on the cosine distance matrix of the topic embeddings with default parameters in BERTopic. Changing these parameters will result in different, more, or fewer cluster suggestions, so we merely use this result as a guide to manual merging.</p>
        <fig id="figure4" position="float">
          <label>Figure 4</label>
          <caption>
            <p>BERTopic hierarchical clustering. The hierarchical clustering shows how topic embeddings can be merged at different varying cosine distances. The figure also provides recommended clusters to merge and shows the topics in the same color, where blue is the default color and is not treated as a cluster.</p>
          </caption>
          <graphic xlink:href="jmir_v25i1e45249_fig4.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <p>In each cluster, we merged similar topics based on our judgment and understanding of what these topics discussed. We treated the dendrogram from BERTopic as recommendations on which topics we could look into for merging. From these recommended clusters, we looked at representative comments and also the top 10 representative words for the topic to make our decisions regarding merging. In addition, we looked into symptoms, treatments, life expectancy, comorbidities, and the general lives of patients with cystic fibrosis in order to name each of the merged topics. We decided to merge the following sets of topics: {29, 2, 26}, {28, 10, 4}, {15, 19}, {8, 25}, and {5, 24, 12}. In contrast, we chose not to merge the set {3, 21} since one appears to talk about financing while the other was about some type of medication; similarly, we excluded 27 from {8, 25} since the keywords for topic 27 imply that it talks about children and hereditariness, while 8 and 25 talks about mutations; likewise, topics 11 and 1 both have keywords that talk about medical care and life in general; and lastly, topic 0 was not included in {5, 24, 12} because it looks like topic 0 talks about a specific drug, Trikafta, and the liver, while the others talked about things related to respiration or the respiratory tract. After this step, we were left with 42,060 comments categorized into 22 topics. In <xref ref-type="table" rid="table3">Table 3</xref>, we summarize these topics, their top 10 representative words, and how they were merged relative to the original 30 topics.</p>
        <table-wrap position="float" id="table3">
          <label>Table 3</label>
          <caption>
            <p>Top 10 representative words for merged topics based on the class-based term frequency-inverse document scores.</p>
          </caption>
          <table border="1" rules="groups" cellpadding="5" frame="hsides" width="1000" cellspacing="0">
            <col width="300"/>
            <col width="500"/>
            <col width="200"/>
            <thead>
              <tr valign="top">
                <td>Merged topics</td>
                <td>Top 10 representative words</td>
                <td>Original topics</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Trikafta and side effects</td>
                <td>trikafta, liver, started, dose, dont, taking, effects, drug, feel, day</td>
                <td>0</td>
              </tr>
              <tr valign="top">
                <td>Gratitude</td>
                <td>thank, thanks, thats, good, awesome, great, luck, oh, hear, congrats</td>
                <td>2, 26, 29</td>
              </tr>
              <tr valign="top">
                <td>Social life</td>
                <td>life, people, dont, know, time, think, things, feel, want, going</td>
                <td>1</td>
              </tr>
              <tr valign="top">
                <td>Creon medication and diet</td>
                <td>weight, eat, creon, enzymes, eating, food, fat, calories, diet, gain</td>
                <td>4, 10, 28</td>
              </tr>
              <tr valign="top">
                <td>Lung transplants and respiration</td>
                <td>transplant, lung, cough, lungs, coughing, blood, function, time, mucus, dont</td>
                <td>5, 12, 14</td>
              </tr>
              <tr valign="top">
                <td>Vertex treatment and financing</td>
                <td>insurance, vertex, pay, drug, copay, price, cost, drugs, health care, company</td>
                <td>3</td>
              </tr>
              <tr valign="top">
                <td>Sweat testing and mutations</td>
                <td>test, sweat, mutations, mutation, genetic, symptoms, rare, testing, gene, diagnosed</td>
                <td>8, 25</td>
              </tr>
              <tr valign="top">
                <td>FEV<sub>1</sub><sup>a</sup> marker for CF<sup>b</sup></td>
                <td>fev1, running, run, exercise, cardio, good, bike, time, gym, dont</td>
                <td>15, 19</td>
              </tr>
              <tr valign="top">
                <td>Marijuana</td>
                <td>smoking, smoke, edibles, weed, cannabis, thc, marijuana, vaping, cbd, smoked</td>
                <td>6</td>
              </tr>
              <tr valign="top">
                <td>Nebulizers</td>
                <td>water, nebs, pari, sterilizer, eflow, bottle, neb, nebulizer, compressor, baby</td>
                <td>7</td>
              </tr>
              <tr valign="top">
                <td>Dairy and calories</td>
                <td>butter, milk, fat, cheese, peanut, cream, eat, calories, protein, food</td>
                <td>9</td>
              </tr>
              <tr valign="top">
                <td>Medical facilities and professionals</td>
                <td>clinic, hospital, care, dont, doctor, know, nurses, nurse, time, team</td>
                <td>11</td>
              </tr>
              <tr valign="top">
                <td>Tobramycin side effects and alternatives</td>
                <td>tobi, cayston, hearing, podhaler, loss, voice, colistin, tobramycin, month, inhaled</td>
                <td>13</td>
              </tr>
              <tr valign="top">
                <td>Diabetes</td>
                <td>insulin, sugar, cfrd, diabetes, blood, sugars, glucose, pump, diabetic, diet</td>
                <td>14</td>
              </tr>
              <tr valign="top">
                <td>Sinuses and breathing</td>
                <td>sinus, smell, sinuses, surgery, nose, rinses, nasal, ent, polyps, surgeries</td>
                <td>16</td>
              </tr>
              <tr valign="top">
                <td>IV<sup>c</sup> access port</td>
                <td>port, picc, arm, line, veins, lines, piccs, ivs, dressing, iv</td>
                <td>17</td>
              </tr>
              <tr valign="top">
                <td>Air quality</td>
                <td>air, filter, filters, hepa, live, purifier, house, humidity, ac, purifiers</td>
                <td>18</td>
              </tr>
              <tr valign="top">
                <td>Birthdays</td>
                <td>birthday, happy, old, day, ich, year, und, mit, years, months</td>
                <td>20</td>
              </tr>
              <tr valign="top">
                <td>Orkambi medication and alternatives</td>
                <td>orkambi, symdeko, effects, started, better, kalydeco, didnt, taking, function, year</td>
                <td>21</td>
              </tr>
              <tr valign="top">
                <td>Masks</td>
                <td>mask, masks, wear, wearing, n95, surgical, people, cambridge, protect, vogmask</td>
                <td>22</td>
              </tr>
              <tr valign="top">
                <td>Airway clearance vests</td>
                <td>vest, monarch, pep, vests, effective, clearance, flutter, aerobika, acapella, hillrom</td>
                <td>23</td>
              </tr>
              <tr valign="top">
                <td>CF inheritance and family planning</td>
                <td>carrier, child, children, kids, chance, pregnant, carriers, pregnancy, birth, baby</td>
                <td>27</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table3fn1">
              <p><sup>a</sup>FEV<sub>1</sub>: forced expiratory volume in 1s.</p>
            </fn>
            <fn id="table3fn2">
              <p><sup>b</sup>CF: cystic fibrosis.</p>
            </fn>
            <fn id="table3fn3">
              <p><sup>c</sup>IV: intravenous.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <p>While it was not our main focus of the analysis, we provided a simple visualization of the evolution of topics within our time period using dynamic topic modeling. <xref rid="figure5" ref-type="fig">Figure 5</xref> shows the time series of the top 5 topics. From the graph, we can see that there was little activity in the early years of the subreddit, and most topics seemed to be fairly uniform until around 2020 when more variations between topics became visible.</p>
        <fig id="figure5" position="float">
          <label>Figure 5</label>
          <caption>
            <p>Timeline for the monthly comments per topic for the top 5 topics. A red dotted line indicates the date when the World Health Organization declared COVID-19 a public health emergency of international concern. There was also a surge in the number of comments at the start of 2018, but it slowed down and started to decrease at the beginning of the COVID-19 pandemic in 2020.</p>
          </caption>
          <graphic xlink:href="jmir_v25i1e45249_fig5.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Time Series Analysis</title>
        <p>We first checked the cross-correlation of the prewhitened series for the number of active users and the number of comments and noticed that only lag 0 was significant. This means that both time series are correlated within the same time period rather than timesteps ahead of or behind each other. To simplify the inclusion of the number of active users as a regressor, we add it to the regression model in the same way a regressor would be added to a linear regression model without the use of a transfer function.</p>
        <p>The series for each data set were nonstationary after observing the graphs of their ACFs. We determined that 1 order of differencing was enough to make each series stationary and confirmed this with the Augmented Dickey-Fuller unit root test. Observing the behaviors of their PACFs, which cuts off after a certain lag in addition to their ACF decaying to zero (see <xref ref-type="table" rid="table1">Table 1</xref>), leads us to conclude that each of the series follows an AR process. We determine the orders for the AR process by the lag where the PACF cuts off (see <xref ref-type="table" rid="table4">Table 4</xref>). The order of the AR process can be interpreted as how many lags are predictive of the current timestep; for example, an AR(3) process implies that the observation at the current timestep can be described using observations from the past 3 timesteps. We also note here that for the series representing the topic “IV access port,” the cutting-off behavior of the PACF was not as apparent as in the other series, but we still opted to fit an AR process for simplicity.</p>
        <table-wrap position="float" id="table4">
          <label>Table 4</label>
          <caption>
            <p>Data sets, time series model specifications, and significance of the COVID-19 period.</p>
          </caption>
          <table border="1" rules="groups" cellpadding="5" frame="hsides" width="1000" cellspacing="0">
            <col width="300"/>
            <col width="100"/>
            <col width="200"/>
            <col width="300"/>
            <col width="100"/>
            <thead>
              <tr valign="bottom">
                <td>Data set</td>
                <td>Comments</td>
                <td>ARIMA<sup>a</sup> order</td>
                <td>Coefficient of COVID-19 variable</td>
                <td><italic>P</italic> value</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Aggregate data</td>
                <td>42,060</td>
                <td>(1,1,0)</td>
                <td>–157.185</td>
                <td>&#60;.001</td>
              </tr>
              <tr valign="top">
                <td>Trikafta and side effects</td>
                <td>4705</td>
                <td>(1,1,0)</td>
                <td>–18.765</td>
                <td>&#60;.001</td>
              </tr>
              <tr valign="top">
                <td>Gratitude</td>
                <td>4240</td>
                <td>(1,1,0)</td>
                <td>–9.980</td>
                <td>.004</td>
              </tr>
              <tr valign="top">
                <td>Social life</td>
                <td>4008</td>
                <td>(1,1,0)</td>
                <td>4.093</td>
                <td>.10</td>
              </tr>
              <tr valign="top">
                <td>Creon medication and diet</td>
                <td>3876</td>
                <td>(2,1,0)</td>
                <td>3.371</td>
                <td>.34</td>
              </tr>
              <tr valign="top">
                <td>Lung transplants and respiration</td>
                <td>3654</td>
                <td>(1,1,0)</td>
                <td>–6.566</td>
                <td>.01</td>
              </tr>
              <tr valign="top">
                <td>Vertex treatment and financing</td>
                <td>2370</td>
                <td>(1,1,0)</td>
                <td>–5.087</td>
                <td>.11</td>
              </tr>
              <tr valign="top">
                <td>Sweat testing and mutations</td>
                <td>2118</td>
                <td>(1,1,0)</td>
                <td>–5.468</td>
                <td>.006</td>
              </tr>
              <tr valign="top">
                <td>FEV<sub>1</sub><sup>b</sup> marker for CF<sup>c</sup></td>
                <td>1952</td>
                <td>(1,1,0)</td>
                <td>0.332</td>
                <td>.91</td>
              </tr>
              <tr valign="top">
                <td>Marijuana</td>
                <td>1551</td>
                <td>(1,1,0)</td>
                <td>4.808</td>
                <td>.039</td>
              </tr>
              <tr valign="top">
                <td>Nebulizers</td>
                <td>1487</td>
                <td>(2,1,0)</td>
                <td>–0.937</td>
                <td>.61</td>
              </tr>
              <tr valign="top">
                <td>Dairy and calories</td>
                <td>1302</td>
                <td>(4,1,0)</td>
                <td>0.365</td>
                <td>.58</td>
              </tr>
              <tr valign="top">
                <td>Medical facilities and professionals</td>
                <td>1299</td>
                <td>(2,1,0)</td>
                <td>–2.543</td>
                <td>.003</td>
              </tr>
              <tr valign="top">
                <td>Tobramycin side effects and alternatives</td>
                <td>1187</td>
                <td>(3,1,0)</td>
                <td>–3.641</td>
                <td>.003</td>
              </tr>
              <tr valign="top">
                <td>Diabetes</td>
                <td>1061</td>
                <td>(4,1,0)</td>
                <td>–0.493</td>
                <td>.67</td>
              </tr>
              <tr valign="top">
                <td>Sinuses and breathing</td>
                <td>1037</td>
                <td>(2,1,0)</td>
                <td>0.563</td>
                <td>.53</td>
              </tr>
              <tr valign="top">
                <td>IV<sup>d</sup> access port</td>
                <td>1036</td>
                <td>(1,1,0)</td>
                <td>1.809</td>
                <td>.30</td>
              </tr>
              <tr valign="top">
                <td>Air quality</td>
                <td>1001</td>
                <td>(2,1,0)</td>
                <td>–1.243</td>
                <td>.15</td>
              </tr>
              <tr valign="top">
                <td>Birthdays</td>
                <td>899</td>
                <td>(2,1,0)</td>
                <td>–1.264</td>
                <td>.09</td>
              </tr>
              <tr valign="top">
                <td>Orkambi medication and alternatives</td>
                <td>840</td>
                <td>(1,1,0)</td>
                <td>–1.247</td>
                <td>.39</td>
              </tr>
              <tr valign="top">
                <td>Masks</td>
                <td>828</td>
                <td>(1,1,0)</td>
                <td>0.368</td>
                <td>.78</td>
              </tr>
              <tr valign="top">
                <td>Airway clearance vests</td>
                <td>819</td>
                <td>(2,1,0)</td>
                <td>1.239</td>
                <td>.12</td>
              </tr>
              <tr valign="top">
                <td>CF inheritance and family planning</td>
                <td>790</td>
                <td>(1,1,0)</td>
                <td>–3.029</td>
                <td>.002</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table4fn1">
              <p><sup>a</sup>ARIMA: autoregressive integrated moving average.</p>
            </fn>
            <fn id="table4fn2">
              <p><sup>b</sup>FEV<sub>1</sub>: forced expiratory volume in 1s.</p>
            </fn>
            <fn id="table4fn3">
              <p><sup>c</sup>CF: cystic fibrosis.</p>
            </fn>
            <fn id="table4fn4">
              <p><sup>d</sup>IV: intravenous.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <p>The results from fitting an ARIMA model for the number of comments with exogenous variables for the number of active users and the dummy variable to indicate the months of the COVID-19 pandemic can be seen in <xref ref-type="table" rid="table4">Table 4</xref>. For all series, all lags following the orders for the AR process were significant at an α level of .05.</p>
        <p>The last column in <xref ref-type="table" rid="table4">Table 4</xref> shows the <italic>P</italic> values for the COVID-19 dummy variable. Using an α level of .05, we say that the user activity for the data sets that show a <italic>P</italic> value less than .05 showed a significant difference in trend during the COVID-19 pandemic. There were 9 data sets: the denoised aggregate data set and 8 topic data sets that showed a significant change in trend. For the aggregate data set and topics “Trikafta and side effects,” “Gratitude,” “Lung transplants and respiration,” “Sweat testing and mutations,” “Medical facilities and professionals,” “Tobramycin side effects and alternatives,” and “CF inheritance and family planning,” there was a significant decrease in the number of comments during the COVID-19 period. In contrast, for the topic “Marijuana,” there was a significant increase in the number of comments during the period.</p>
        <p>We followed the same steps to check the significance of COVID-19 on the number of active users without any exogenous variables except for the dummy variable representing COVID-19 and found that COVID-19 was not statistically significant in the model. This implies that the trend in activity during the pandemic period was not significantly different from the trend outside this period.</p>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Results</title>
        <p>There is a mix of increased and decreased activity for topics that showed a significant difference in trend during the COVID-19 pandemic. This indicates a shift in attention or focus of discussion topics during the pandemic, despite an overall decrease in activity on the subreddit. In addition, our results also describe the time series processes that describe user activity for each topic. These models show how immediate or prolonged the predictive capacity of past activity is for future activity.</p>
        <p>The topics from our BERTopic model showed a variety of discussion interests within the cystic fibrosis community, ranging from medications, treatments, symptoms, finances, or living with their disease. By incorporating a dummy variable for the pandemic period, we were able to check the statistical significance of this period on the amount of user activity for each topic.</p>
        <p>Our study shows that there was an overall decrease in user activity during the pandemic. Considering that there was no statistically significant decrease in the number of active users during the pandemic, the decrease in user activity during this period can be attributed to other factors, such as other priorities that came about during the pandemic. Users could have been spending less time commenting on subreddits and instead focusing on gathering information about the pandemic, keeping up with changing health protocols, job security, or family members, to name a few. The decreased activity during this period did not automatically reflect in the individual topics, as evident in the increased activity for the topic “Marijuana.” In the succeeding subsections, we discuss the topics that underwent a significant change in trend during the pandemic and the possible reasons behind the change, as well as the varying ARIMA orders among the topics.</p>
      </sec>
      <sec>
        <title>Topics Significantly Affected by COVID-19</title>
        <p>Column “<italic>P</italic> value” in <xref ref-type="table" rid="table4">Table 4</xref> summarizes the significance testing for the COVID-19 dummy variable using an α level of .05. We interpret topics with a <italic>P</italic> value less than .05 for the dummy variable as showing a significant change in trend during the COVID-19 pandemic. In this section, we discuss the topics that showed a statistically significant difference in trends during the COVID-19 pandemic.</p>
        <p>The topic “Marijuana” showed top keywords such as “smoking,” “smoke,” “edibles,” and “weed.” Looking at some sample comments on this topic led us to conclude that the discussion is about the use of cannabis and alternative means of using cannabis aside from smoking (see <xref ref-type="table" rid="table2">Table 2</xref>). The rise in activity during this period could be due to the additional stresses brought about by the pandemic and, thus, the curiosity in exploring the use of medical marijuana. In Stephen et al’s [<xref ref-type="bibr" rid="ref28">28</xref>] study, they showed that side effects were rare and mild, should there be any at all, and that the use of medical marijuana was effective in relieving symptoms of stress and pain.</p>
        <p>A trickle-down effect on the overall negative effect on the aggregate data set could explain topics with a negative coefficient for COVID-19. The topics “Trikafta and side effects” and “Tobramycin side effects and alternatives” both talk about medications and could have lower activity because of other newer and more pressing matters during the pandemic that require more of their attention. In contrast, these medications have a more established database of information. Trikafta is an approved medication for patients with cystic fibrosis with the most common type of mutation that targets the underlying cause, while Tobramycin is an antibiotic that targets <italic>Pseudomonas aeruginosa</italic> infection in patients with cystic fibrosis [<xref ref-type="bibr" rid="ref29">29</xref>]. The decrease in activity for “Gratitude” could be explained by having fewer things for users to be grateful for during the period or due to fewer interactions overall. The topics “Lung transplants and respiration,” “Sweat testing and mutations,” and “Medical facilities and professionals” relate to hospital or clinic visitation; decreased activity could be explained by lower visitation rates due to limited hospital capacity or precautions in place for patients with cystic fibrosis to avoid getting COVID-19 [<xref ref-type="bibr" rid="ref11">11</xref>]. Lastly, for the topic “CF inheritance and family planning,” the decreased activity could be due to higher uncertainties about the future in light of the long pandemic period and lower interest in discussing this topic.</p>
      </sec>
      <sec>
        <title>Topics With Higher ARIMA Orders</title>
        <p>In this section, we discuss the interpretations for the ARIMA models used in each topic data set and give possible insights into their time series behavior. The ARIMA orders for the different data sets mostly follow an ARIMA(1,1,0) process, including the aggregate data set. This means that these series require 1 order of differencing to make the series stationary and can be described using an AR(1) process. An AR(1) process implies that the observation of the current time period can be described using observations from the previous time period. We can interpret this as users having shorter terms for engagement since only the previous timestep is descriptive of current activities. In contrast, topics that were modeled with a higher AR order imply a longer engagement of users since longer durations of user activity describe the current timestep.</p>
        <p>The topics “Dairy and calories” and “Diabetes” have the highest AR order, AR(4). The higher order can be explained by a longer duration of interest in discussions or a user following up on previous discussions. Compared to an AR(1) process, an AR(4) process means that activities from the previous 4 timesteps are descriptive of the activities in the current timestep. Both topics describe dietary concerns, which could explain the prolonged engagement since it takes time to observe changes or side effects from diet changes. The topic “Diabetes” is talking specifically about cystic fibrosis–related diabetes, which is the most common complication experienced by patients with cystic fibrosis, with around 50% of patients with cystic fibrosis developing cystic fibrosis–related diabetes by the age of 30; the high comorbidity rate could be another reason for the prolonged engagement. Additionally, the topic “Creon medication and diet” is another topic relating to the diets of patients with cystic fibrosis and is modeled using AR(2) and is also longer than most other topics. These topics relate to the nutrition and diet of patients with cystic fibrosis, which need to be carefully monitored to avoid complications and improve symptoms [<xref ref-type="bibr" rid="ref30">30</xref>-<xref ref-type="bibr" rid="ref33">33</xref>].</p>
        <p>The topic “Tobramycin side effects and alternatives” has a high AR order as well, AR(3). Tobramycin is an antibiotic used to treat <italic>P aeruginosa</italic> infections in patients with cystic fibrosis. This infection is highly prevalent among patients with cystic fibrosis, often results in chronic infections after the first infection, and is resistant to antibiotics [<xref ref-type="bibr" rid="ref34">34</xref>,<xref ref-type="bibr" rid="ref35">35</xref>]. This infection can drastically reduce the quality of life and life span. The seriousness of this infection and its resistance to antibiotics could explain the longer durations in discussing this topic.</p>
        <p>Interestingly, topics that were modeled using the AR(2) had a number of topics (5 out of 7) related to respiration and common symptoms: “Nebulizers,” “Medical facilities and professionals,” “Sinuses and breathing,” “Air quality,” and “Airway clearance vests.” The higher AR order could be explained by longer discussions about symptoms, procedures used to alleviate symptoms, and the time it takes to notice effects or side effects. Interestingly, the topic “Birthdays” also follows an AR(2) process, where the long process could be due to prolonged celebrations of reaching another milestone [<xref ref-type="bibr" rid="ref36">36</xref>].</p>
      </sec>
      <sec>
        <title>Limitations</title>
        <p>Many of the comments were not classified into any topics, as evident by the 78,678 comments categorized as outliers in topic –1. This can be expected in social media, especially for short texts and quick exchanges between users; thus, these comments do not have enough contextual information for the model to classify them. Although some comments, on their own, may not talk about a specific topic, they may still contribute to the sentiment of the topic discussed in the parent comments or the main post.</p>
        <p>We chose BERTopic for its convenience and because it assigns each document to exactly 1 topic; however, we did not spend much time fine-tuning the model and tweaking its performance. We checked a few comments for each topic and saw that they were properly clustered with similar documents. While BERTopic has shown more robust performance in topic modeling of social media posts than other topic modeling methods [<xref ref-type="bibr" rid="ref37">37</xref>], topic modeling is still a fairly subjective process. The results we presented demonstrate the possibility of using topic modeling with time series analysis to gain insight into the needs of patients with rare diseases rather than to evaluate the performance of BERTopic as a topic model.</p>
        <p>Our definition of the dummy variable used to indicate the period of the pandemic was based on how long we believe the pandemic affected the number of comments, but the effects of the pandemic could have started later or could have ended sooner. We chose the year 2020 to capture the early influence of the pandemic when cases were starting to be reported and until the end of the year, when countries were starting to relax health protocols. Our time series modeling was also not as extensive to perform the same procedure for each data set, referring to our assumption that only the current lag for each topic’s data set is significant following the behavior of the aggregate data set. The purpose of the analysis was to fit a model to the time series data and check for the statistical significance of an event rather than forecasting future values; thus, we no longer performed residual analysis to evaluate our models and instead observed the graph of the fitted values superimposed on the raw data. This allowed us to observe the broader trends across all the discussion topics rather than being restricted to only a small data set of topics.</p>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>We focused on the cystic fibrosis community because of the active Reddit users in r/Cystic Fibrosis and because the symptoms of cystic fibrosis overlap with those of COVID-19. We found that during the events of the COVID-19 pandemic, there was an overall decrease in user activity. Our time series analysis accounting for the number of active users showed a mix of positive and negative effects from COVID-19, indicating a shift in focus in discussion topics during this period. Additionally, the varying orders of the ARIMA models among the topics indicate that the user activity for some topics has a longer predictive capacity than others. These topics do not necessarily reflect the needs or interests of patients with cystic fibrosis or patients with rare diseases more generally. Still, our results show what patients were concerned about in the dynamic interactions between patients on the subreddit, as well as how they were disrupted by the COVID-19 pandemic.</p>
        <p>We anticipate that this methodology and these results can be further developed and studied to be used in providing better care for patients with rare diseases by gaining a better understanding of their needs and how they are affected by external factors. In contrast with the traditional survey- or clinical-based methods, analyzing social media is much quicker while still providing useful insights into patients with rare diseases. Future research may consider extending this methodology to other rare disease subreddits, or even comparisons across multiple subreddits, as a quick and efficient source of information for the lived experiences of patients and their daily struggles and concerns.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group/>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">ACF</term>
          <def>
            <p>auto-correlation function</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">AR</term>
          <def>
            <p>autoregressive</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">ARIMA</term>
          <def>
            <p>autoregressive integrated moving average</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">MA</term>
          <def>
            <p>moving average</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">PACF</term>
          <def>
            <p>partial auto-correlation function</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>This work was supported in part by Japan Science and Technology Agency CREST grant number JPMJCR22N1, Japan; Ministry of Health, Labor and Welfare Program grant number JPMH21AC500111, Japan; and the Center for Robust Intelligence and Social Technology (CRIS), National Institute of Informatics and LINE (NII-LINE), Japan.</p>
    </ack>
    <notes>
      <sec>
        <title>Data Availability</title>
        <p>The data used in this study are available in our GitHub repository [<xref ref-type="bibr" rid="ref21">21</xref>].</p>
      </sec>
    </notes>
    <fn-group>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="web">
          <article-title>Pandemics that changed history: timeline</article-title>
          <source>History</source>
          <access-date>2022-12-13</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.history.com/topics/middle-ages/pandemics-timeline">https://www.history.com/topics/middle-ages/pandemics-timeline</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="web">
          <article-title>A brief history of Facebook</article-title>
          <source>The Guardian</source>
          <access-date>2022-12-13</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.theguardian.com/technology/2007/jul/25/media.newmedia">https://www.theguardian.com/technology/2007/jul/25/media.newmedia</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cinelli</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Quattrociocchi</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Galeazzi</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Valensise</surname>
              <given-names>CM</given-names>
            </name>
            <name name-style="western">
              <surname>Brugnoli</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Schmidt</surname>
              <given-names>AL</given-names>
            </name>
            <name name-style="western">
              <surname>Zola</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Zollo</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Scala</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>The COVID-19 social media infodemic</article-title>
          <source>Sci Rep</source>
          <year>2020</year>
          <volume>10</volume>
          <issue>1</issue>
          <fpage>16598</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/s41598-020-73510-5"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41598-020-73510-5</pub-id>
          <pub-id pub-id-type="medline">33024152</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41598-020-73510-5</pub-id>
          <pub-id pub-id-type="pmcid">PMC7538912</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>González-Padilla</surname>
              <given-names>DA</given-names>
            </name>
            <name name-style="western">
              <surname>Tortolero-Blanco</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Social media influence in the COVID-19 pandemic</article-title>
          <source>Int Braz J Urol</source>
          <year>2020</year>
          <volume>46</volume>
          <issue>suppl.1</issue>
          <fpage>120</fpage>
          <lpage>124</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.intbrazjurol.com.br/pdf/vol46S1/IBJU2020S121.pdf"/>
          </comment>
          <pub-id pub-id-type="doi">10.1590/S1677-5538.IBJU.2020.S121</pub-id>
          <pub-id pub-id-type="medline">32550706</pub-id>
          <pub-id pub-id-type="pii">IBJU2020S121</pub-id>
          <pub-id pub-id-type="pmcid">PMC7719982</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Haleem</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Javaid</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Vaishya</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Effects of COVID-19 pandemic in daily life</article-title>
          <source>Curr Med Res Pract</source>
          <year>2020</year>
          <volume>10</volume>
          <issue>2</issue>
          <fpage>78</fpage>
          <lpage>79</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/32292804"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.cmrp.2020.03.011</pub-id>
          <pub-id pub-id-type="medline">32292804</pub-id>
          <pub-id pub-id-type="pii">S2352-0817(20)30038-6</pub-id>
          <pub-id pub-id-type="pmcid">PMC7147210</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="web">
          <article-title>COVID-19: social media use goes up as country stays indoors</article-title>
          <source>Victoria News</source>
          <access-date>2022-12-13</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.vicnews.com/news/covid-19-social-media-use-goes-up-as-country-stays-indoors/">https://www.vicnews.com/news/covid-19-social-media-use-goes-up-as-country-stays-indoors/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="web">
          <article-title>Symptoms of COVID-19</article-title>
          <source>Centers for Disease Control and Prevention</source>
          <access-date>2022-12-13</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.cdc.gov/coronavirus/2019-ncov/symptoms-testing/symptoms.html">https://www.cdc.gov/coronavirus/2019-ncov/symptoms-testing/symptoms.html</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Peckham</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>McDermott</surname>
              <given-names>MF</given-names>
            </name>
            <name name-style="western">
              <surname>Savic</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Mehta</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>COVID-19 meets cystic fibrosis: for better or worse?</article-title>
          <source>Genes Immun</source>
          <year>2020</year>
          <volume>21</volume>
          <issue>4</issue>
          <fpage>260</fpage>
          <lpage>262</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://eprints.whiterose.ac.uk/162784/"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41435-020-0103-y</pub-id>
          <pub-id pub-id-type="medline">32606316</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41435-020-0103-y</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="web">
          <article-title>Cystic fibrosis: what is cystic fibrosis?</article-title>
          <source>National Heart, Lung, and Blood Institute</source>
          <access-date>2022-12-13</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.nhlbi.nih.gov/health/cystic-fibrosis">https://www.nhlbi.nih.gov/health/cystic-fibrosis</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="web">
          <article-title>Cystic fibrosis: causes</article-title>
          <source>National Heart, Lung, and Blood Institute</source>
          <access-date>2022-12-13</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.nhlbi.nih.gov/health/cystic-fibrosis/causes">https://www.nhlbi.nih.gov/health/cystic-fibrosis/causes</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Colombo</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Burgel</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Gartner</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>van Koningsbruggen-Rietschel</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Naehrlich</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Sermet-Gaudelus</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Southern</surname>
              <given-names>KW</given-names>
            </name>
          </person-group>
          <article-title>Impact of COVID-19 on people with cystic fibrosis</article-title>
          <source>Lancet Respir Med</source>
          <year>2020</year>
          <volume>8</volume>
          <issue>5</issue>
          <fpage>e35</fpage>
          <lpage>e36</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://air.unimi.it/handle/2434/736790"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/S2213-2600(20)30177-6</pub-id>
          <pub-id pub-id-type="medline">32304639</pub-id>
          <pub-id pub-id-type="pii">S2213-2600(20)30177-6</pub-id>
          <pub-id pub-id-type="pmcid">PMC7159857</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Stoller</surname>
              <given-names>JK</given-names>
            </name>
          </person-group>
          <article-title>The challenge of rare diseases</article-title>
          <source>Chest</source>
          <year>2018</year>
          <volume>153</volume>
          <issue>6</issue>
          <fpage>1309</fpage>
          <lpage>1314</lpage>
          <pub-id pub-id-type="doi">10.1016/j.chest.2017.12.018</pub-id>
          <pub-id pub-id-type="medline">29325986</pub-id>
          <pub-id pub-id-type="pii">S0012-3692(18)30064-3</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ashtari</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Taylor</surname>
              <given-names>AD</given-names>
            </name>
          </person-group>
          <article-title>The internet knows more than my physician: qualitative interview study of people with rare diseases and how they use online support groups</article-title>
          <source>J Med Internet Res</source>
          <year>2022</year>
          <volume>24</volume>
          <issue>8</issue>
          <fpage>e39172</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2022/8/e39172/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/39172</pub-id>
          <pub-id pub-id-type="medline">36006679</pub-id>
          <pub-id pub-id-type="pii">v24i8e39172</pub-id>
          <pub-id pub-id-type="pmcid">PMC9459833</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Miller</surname>
              <given-names>EG</given-names>
            </name>
            <name name-style="western">
              <surname>Woodward</surname>
              <given-names>AL</given-names>
            </name>
            <name name-style="western">
              <surname>Flinchum</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Young</surname>
              <given-names>JL</given-names>
            </name>
            <name name-style="western">
              <surname>Tabor</surname>
              <given-names>HK</given-names>
            </name>
            <name name-style="western">
              <surname>Halley</surname>
              <given-names>MC</given-names>
            </name>
          </person-group>
          <article-title>Opportunities and pitfalls of social media research in rare genetic diseases: a systematic review</article-title>
          <source>Genet Med</source>
          <year>2021</year>
          <volume>23</volume>
          <issue>12</issue>
          <fpage>2250</fpage>
          <lpage>2259</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1098-3600(21)05446-0"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41436-021-01273-z</pub-id>
          <pub-id pub-id-type="medline">34282302</pub-id>
          <pub-id pub-id-type="pii">S1098-3600(21)05446-0</pub-id>
          <pub-id pub-id-type="pmcid">PMC8720387</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Davies</surname>
              <given-names>W</given-names>
            </name>
          </person-group>
          <article-title>Insights into rare diseases from social media surveys</article-title>
          <source>Orphanet J Rare Dis</source>
          <year>2016</year>
          <volume>11</volume>
          <issue>1</issue>
          <fpage>151</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://ojrd.biomedcentral.com/articles/10.1186/s13023-016-0532-x"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s13023-016-0532-x</pub-id>
          <pub-id pub-id-type="medline">27829465</pub-id>
          <pub-id pub-id-type="pii">10.1186/s13023-016-0532-x</pub-id>
          <pub-id pub-id-type="pmcid">PMC5103451</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Foufi</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Timakum</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Gaudet-Blavignac</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Lovis</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Song</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Mining of textual health information from reddit: analysis of chronic diseases with extracted entities and their relations</article-title>
          <source>J Med Internet Res</source>
          <year>2019</year>
          <volume>21</volume>
          <issue>6</issue>
          <fpage>e12876</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2019/6/e12876/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/12876</pub-id>
          <pub-id pub-id-type="medline">31199327</pub-id>
          <pub-id pub-id-type="pii">v21i6e12876</pub-id>
          <pub-id pub-id-type="pmcid">PMC6595941</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Leung</surname>
              <given-names>YT</given-names>
            </name>
            <name name-style="western">
              <surname>Khalvati</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <article-title>Exploring COVID-19-related stressors: topic modeling study</article-title>
          <source>J Med Internet Res</source>
          <year>2022</year>
          <volume>24</volume>
          <issue>7</issue>
          <fpage>e37142</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2022/7/e37142/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/37142</pub-id>
          <pub-id pub-id-type="medline">35731966</pub-id>
          <pub-id pub-id-type="pii">v24i7e37142</pub-id>
          <pub-id pub-id-type="pmcid">PMC9285672</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhu</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Sundstrom</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Better understand rare disease patients’ needs by analyzing social media data – a case study of cystic fibrosis</article-title>
          <year>2021</year>
          <conf-name>2021 IEEE International Conference on Bioinformatics and Biomedicine (BIBM)</conf-name>
          <conf-date>9-12, December, 2021</conf-date>
          <conf-loc>Houston, Texas</conf-loc>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>De</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Feng</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Yue</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Yao</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Analyzing patient secure messages using a fast health care interoperability resources (FIHR)-based data model: development and topic modeling study</article-title>
          <source>J Med Internet Res</source>
          <year>2021</year>
          <volume>23</volume>
          <issue>7</issue>
          <fpage>e26770</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2021/7/e26770/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/26770</pub-id>
          <pub-id pub-id-type="medline">34328444</pub-id>
          <pub-id pub-id-type="pii">v23i7e26770</pub-id>
          <pub-id pub-id-type="pmcid">PMC8367168</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ni Ki</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Hosseinian‐Far</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Daneshkhah</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Salari</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <article-title>Topic modelling in precision medicine with its applications in personalized diabetes management</article-title>
          <source>Expert Systems</source>
          <year>2021</year>
          <volume>39</volume>
          <issue>4</issue>
          <fpage>e12774</fpage>
          <pub-id pub-id-type="doi">10.1111/exsy.12774</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="web">
          <article-title>Reddit_topics_time_series-JMIR</article-title>
          <source>Github</source>
          <access-date>2023-02-01</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://github.com/lfyao/Reddit_Topics_Time_Series-JMIR">https://github.com/lfyao/Reddit_Topics_Time_Series-JMIR</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Grootendorst</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>BERTopic: neural topic modeling with a class-based TF-IDF procedure</article-title>
          <source>ArXiv. Preprint published online on March 11, 2022</source>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/2203.05794"/>
          </comment>
          <pub-id pub-id-type="doi">10.48550/arXiv.2203.05794</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Armstrong</surname>
              <given-names>George</given-names>
            </name>
            <name name-style="western">
              <surname>Martino</surname>
              <given-names>Cameron</given-names>
            </name>
            <name name-style="western">
              <surname>Rahman</surname>
              <given-names>Gibraan</given-names>
            </name>
            <name name-style="western">
              <surname>Gonzalez</surname>
              <given-names>Antonio</given-names>
            </name>
            <name name-style="western">
              <surname>Vázquez-Baeza</surname>
              <given-names>Yoshiki</given-names>
            </name>
            <name name-style="western">
              <surname>Mishne</surname>
              <given-names>Gal</given-names>
            </name>
            <name name-style="western">
              <surname>Knight</surname>
              <given-names>Rob</given-names>
            </name>
          </person-group>
          <article-title>Uniform Manifold Approximation and Projection (UMAP) reveals composite patterns and resolves visualization artifacts in microbiome data</article-title>
          <source>mSystems</source>
          <year>2021</year>
          <month>10</month>
          <day>26</day>
          <volume>6</volume>
          <issue>5</issue>
          <fpage>e0069121</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://journals.asm.org/doi/10.1128/mSystems.00691-21?url_ver=Z39.88-2003&#38;rfr_id=ori:rid:crossref.org&#38;rfr_dat=cr_pub  0pubmed"/>
          </comment>
          <pub-id pub-id-type="doi">10.1128/mSystems.00691-21</pub-id>
          <pub-id pub-id-type="medline">34609167</pub-id>
          <pub-id pub-id-type="pmcid">PMC8547469</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Pedregosa</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Varoquaux</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Gramfort</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Michel</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Thirion</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Grisel</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Blondel</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Prettenhofer</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Scikit-learn: machine learning in Python</article-title>
          <source>JMLR</source>
          <year>2011</year>
          <volume>12</volume>
          <fpage>2825</fpage>
          <lpage>2830</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://jmlr.csail.mit.edu/papers/v12/pedregosa11a.html"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Montani</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Honnibal</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Van Landeghem</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Boyd</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Peters</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>O'Leary McCann</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Geovedi</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>explosion/spaCy: v3.5.1: spancat for multi-class labeling, fixes for textcat+transformers and more</article-title>
          <source>Zenodo</source>
          <year>2023</year>
          <month>03</month>
          <day>10</day>
          <access-date>2023-04-11</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://zenodo.org/record/7715077#.ZDWUgezML9E">https://zenodo.org/record/7715077#.ZDWUgezML9E</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wei</surname>
              <given-names>WWS</given-names>
            </name>
          </person-group>
          <source>Time Series Analysis: Univariate and Multivariate Methods, 2nd ed</source>
          <year>2006</year>
          <publisher-loc>Boston</publisher-loc>
          <publisher-name>Pearson / Addison Wesley</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="web">
          <article-title>CDC museum COVID-19 timeline</article-title>
          <source>Centers for Disease Control and Prevention</source>
          <access-date>2022-12-13</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.cdc.gov/museum/timeline/covid19.html">https://www.cdc.gov/museum/timeline/covid19.html</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Stephen</surname>
              <given-names>MJ</given-names>
            </name>
            <name name-style="western">
              <surname>Chowdhury</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Tejada</surname>
              <given-names>LA</given-names>
            </name>
            <name name-style="western">
              <surname>Zanni</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Hadjiliadis</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Use of medical marijuana in cystic fibrosis patients</article-title>
          <source>BMC Complement Med Ther</source>
          <year>2020</year>
          <volume>20</volume>
          <issue>1</issue>
          <fpage>1</fpage>
          <lpage>5</lpage>
          <pub-id pub-id-type="doi">10.1186/s12906-020-03116-x</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hagerman</surname>
              <given-names>JK</given-names>
            </name>
            <name name-style="western">
              <surname>Knechtel</surname>
              <given-names>SA</given-names>
            </name>
            <name name-style="western">
              <surname>Klepser</surname>
              <given-names>ME</given-names>
            </name>
          </person-group>
          <article-title>Tobramycin solution for inhalation in cystic fibrosis patients: a review of the literature</article-title>
          <source>Expert Opin Pharmacother</source>
          <year>2007</year>
          <volume>8</volume>
          <issue>4</issue>
          <fpage>467</fpage>
          <lpage>475</lpage>
          <pub-id pub-id-type="doi">10.1517/14656566.8.4.467</pub-id>
          <pub-id pub-id-type="medline">17309341</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>O'Riordan</surname>
              <given-names>SM</given-names>
            </name>
            <name name-style="western">
              <surname>Dattani</surname>
              <given-names>MT</given-names>
            </name>
            <name name-style="western">
              <surname>Hindmarsh</surname>
              <given-names>PC</given-names>
            </name>
          </person-group>
          <article-title>Cystic fibrosis-related diabetes in childhood</article-title>
          <source>Horm Res Paediatr</source>
          <year>2010</year>
          <volume>73</volume>
          <issue>1</issue>
          <fpage>15</fpage>
          <lpage>24</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.karger.com?DOI=10.1159/000271912"/>
          </comment>
          <pub-id pub-id-type="doi">10.1159/000271912</pub-id>
          <pub-id pub-id-type="medline">20190536</pub-id>
          <pub-id pub-id-type="pii">000271912</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Strandvik</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>Nutrition in cystic fibrosis-some notes on the fat recommendations</article-title>
          <source>Nutrients</source>
          <year>2022</year>
          <volume>14</volume>
          <issue>4</issue>
          <fpage>853</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.mdpi.com/resolver?pii=nu14040853"/>
          </comment>
          <pub-id pub-id-type="doi">10.3390/nu14040853</pub-id>
          <pub-id pub-id-type="medline">35215502</pub-id>
          <pub-id pub-id-type="pii">nu14040853</pub-id>
          <pub-id pub-id-type="pmcid">PMC8875685</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Nestel</surname>
              <given-names>PJ</given-names>
            </name>
            <name name-style="western">
              <surname>Mellett</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Pally</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Wong</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Barlow</surname>
              <given-names>CK</given-names>
            </name>
            <name name-style="western">
              <surname>Croft</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Mori</surname>
              <given-names>TA</given-names>
            </name>
            <name name-style="western">
              <surname>Meikle</surname>
              <given-names>PJ</given-names>
            </name>
          </person-group>
          <article-title>Effects of low-fat or full-fat fermented and non-fermented dairy foods on selected cardiovascular biomarkers in overweight adults</article-title>
          <source>Br J Nutr</source>
          <year>2013</year>
          <volume>110</volume>
          <issue>12</issue>
          <fpage>2242</fpage>
          <lpage>2249</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://research-repository.uwa.edu.au/en/publications/effects-of-low-fat-or-full-fat-fermented-and-non-fermented-dairy-"/>
          </comment>
          <pub-id pub-id-type="doi">10.1017/S0007114513001621</pub-id>
          <pub-id pub-id-type="medline">23756569</pub-id>
          <pub-id pub-id-type="pii">S0007114513001621</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="web">
          <article-title>EPI diet and lifestyle information</article-title>
          <source>CREON (Pancrelipase)</source>
          <access-date>2022-12-13</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.creoninfo.com/diet-and-lifestyle">https://www.creoninfo.com/diet-and-lifestyle</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Malhotra</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Hayes</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Wozniak</surname>
              <given-names>DJ</given-names>
            </name>
          </person-group>
          <article-title>Cystic fibrosis and Pseudomonas aeruginosa: the host-microbe interface</article-title>
          <source>Clin Microbiol Rev</source>
          <year>2019</year>
          <volume>32</volume>
          <issue>3</issue>
          <fpage>e00138-18</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/31142499"/>
          </comment>
          <pub-id pub-id-type="doi">10.1128/CMR.00138-18</pub-id>
          <pub-id pub-id-type="medline">31142499</pub-id>
          <pub-id pub-id-type="pii">32/3/e00138-18</pub-id>
          <pub-id pub-id-type="pmcid">PMC6589863</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lund-Palau</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Turnbull</surname>
              <given-names>AR</given-names>
            </name>
            <name name-style="western">
              <surname>Bush</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Bardin</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Cameron</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Soren</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Wierre-Gore</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Alton</surname>
              <given-names>EW</given-names>
            </name>
            <name name-style="western">
              <surname>Bundy</surname>
              <given-names>JG</given-names>
            </name>
            <name name-style="western">
              <surname>Connett</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Faust</surname>
              <given-names>SN</given-names>
            </name>
            <name name-style="western">
              <surname>Filloux</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Freemont</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Jones</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Khoo</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Morales</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Murphy</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Pabary</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Simbo</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Schelenz</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Takats</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Webb</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Williams</surname>
              <given-names>HD</given-names>
            </name>
            <name name-style="western">
              <surname>Davies</surname>
              <given-names>JC</given-names>
            </name>
          </person-group>
          <article-title>Pseudomonas aeruginosa infection in cystic fibrosis: pathophysiological mechanisms and therapeutic approaches</article-title>
          <source>Expert Rev Respir Med</source>
          <year>2016</year>
          <volume>10</volume>
          <issue>6</issue>
          <fpage>685</fpage>
          <lpage>697</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://hdl.handle.net/10044/1/38770"/>
          </comment>
          <pub-id pub-id-type="doi">10.1080/17476348.2016.1177460</pub-id>
          <pub-id pub-id-type="medline">27175979</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>McBennett</surname>
              <given-names>KA</given-names>
            </name>
            <name name-style="western">
              <surname>Davis</surname>
              <given-names>PB</given-names>
            </name>
            <name name-style="western">
              <surname>Konstan</surname>
              <given-names>MW</given-names>
            </name>
          </person-group>
          <article-title>Increasing life expectancy in cystic fibrosis: advances and challenges</article-title>
          <source>Pediatr Pulmonol</source>
          <year>2022</year>
          <volume>57</volume>
          <issue>Suppl 1</issue>
          <fpage>S5</fpage>
          <lpage>S12</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/34672432"/>
          </comment>
          <pub-id pub-id-type="doi">10.1002/ppul.25733</pub-id>
          <pub-id pub-id-type="medline">34672432</pub-id>
          <pub-id pub-id-type="pmcid">PMC9004282</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Egger</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Yu</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>A topic modeling comparison between LDA, NMF, Top2Vec, and BERTopic to demystify twitter posts</article-title>
          <source>Front. Sociol</source>
          <year>2022</year>
          <volume>7</volume>
          <fpage>65</fpage>
          <pub-id pub-id-type="doi">10.3389/fsoc.2022.886498</pub-id>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
