<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMIR</journal-id>
      <journal-id journal-id-type="nlm-ta">J Med Internet Res</journal-id>
      <journal-title>Journal of Medical Internet Research</journal-title>
      <issn pub-type="epub">1438-8871</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v24i7e29056</article-id>
      <article-id pub-id-type="pmid">35852835</article-id>
      <article-id pub-id-type="doi">10.2196/29056</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Use of Multiple Correspondence Analysis and K-means to Explore Associations Between Risk Factors and Likelihood of Colorectal Cancer: Cross-sectional Study</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Mavragani</surname>
            <given-names>Amaryllis</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Moore</surname>
            <given-names>Candace Makeda</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Mohammadi</surname>
            <given-names>Esmaeil</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Chu</surname>
            <given-names>Yuanchia</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Espinosa-Leal</surname>
            <given-names>Leonardo</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Florensa</surname>
            <given-names>Dídac</given-names>
          </name>
          <degrees>MPH</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>Department of Computer Science</institution>
            <institution>University of Lleida</institution>
            <addr-line>Jaume II, 69</addr-line>
            <addr-line>Lleida, 25001</addr-line>
            <country>Spain</country>
            <phone>34 973 70 27 00</phone>
            <email>didac.florensa@gencat.cat</email>
          </address>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-0743-6512</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author">
          <name name-style="western">
            <surname>Mateo-Fornés</surname>
            <given-names>Jordi</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-1660-0380</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Solsona</surname>
            <given-names>Francesc</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-4830-9184</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Pedrol Aige</surname>
            <given-names>Teresa</given-names>
          </name>
          <degrees>MPH</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-5695-7149</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author">
          <name name-style="western">
            <surname>Mesas Julió</surname>
            <given-names>Miquel</given-names>
          </name>
          <degrees>MPH</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-0962-5205</ext-link>
        </contrib>
        <contrib id="contrib6" contrib-type="author">
          <name name-style="western">
            <surname>Piñol</surname>
            <given-names>Ramon</given-names>
          </name>
          <degrees>BSc</degrees>
          <xref rid="aff4" ref-type="aff">4</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-1467-9416</ext-link>
        </contrib>
        <contrib id="contrib7" contrib-type="author">
          <name name-style="western">
            <surname>Godoy</surname>
            <given-names>Pere</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff5" ref-type="aff">5</xref>
          <xref rid="aff6" ref-type="aff">6</xref>
          <xref rid="aff7" ref-type="aff">7</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-2896-7286</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Department of Computer Science</institution>
        <institution>University of Lleida</institution>
        <addr-line>Lleida</addr-line>
        <country>Spain</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Department of Computer Systems</institution>
        <institution>Santa Maria University Hospital</institution>
        <addr-line>Lleida</addr-line>
        <country>Spain</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>Hospital-based Cancer Registry</institution>
        <institution>Arnau de Vilanova University Hospital</institution>
        <addr-line>Lleida</addr-line>
        <country>Spain</country>
      </aff>
      <aff id="aff4">
        <label>4</label>
        <institution>Catalan Health Service</institution>
        <institution>Department of Health</institution>
        <addr-line>Lleida</addr-line>
        <country>Spain</country>
      </aff>
      <aff id="aff5">
        <label>5</label>
        <institution>Biomedical Institute Research of Lleida</institution>
        <addr-line>Lleida</addr-line>
        <country>Spain</country>
      </aff>
      <aff id="aff6">
        <label>6</label>
        <institution>Centro de Investigación Biomédica en Red</institution>
        <addr-line>Madrid</addr-line>
        <country>Spain</country>
      </aff>
      <aff id="aff7">
        <label>7</label>
        <institution>Santa Maria University Hospital</institution>
        <institution>Population Cancer Registry</institution>
        <addr-line>Lleida</addr-line>
        <country>Spain</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Dídac Florensa <email>didac.florensa@gencat.cat</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <month>7</month>
        <year>2022</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>19</day>
        <month>7</month>
        <year>2022</year>
      </pub-date>
      <volume>24</volume>
      <issue>7</issue>
      <elocation-id>e29056</elocation-id>
      <history>
        <date date-type="received">
          <day>25</day>
          <month>3</month>
          <year>2021</year>
        </date>
        <date date-type="rev-request">
          <day>28</day>
          <month>12</month>
          <year>2021</year>
        </date>
        <date date-type="rev-recd">
          <day>22</day>
          <month>2</month>
          <year>2022</year>
        </date>
        <date date-type="accepted">
          <day>23</day>
          <month>5</month>
          <year>2022</year>
        </date>
      </history>
      <copyright-statement>©Dídac Florensa, Jordi Mateo-Fornés, Francesc Solsona, Teresa Pedrol Aige, Miquel Mesas Julió, Ramon Piñol, Pere Godoy. Originally published in the Journal of Medical Internet Research (https://www.jmir.org), 19.07.2022.</copyright-statement>
      <copyright-year>2022</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in the Journal of Medical Internet Research, is properly cited. The complete bibliographic information, a link to the original publication on https://www.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://www.jmir.org/2022/7/e29056" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Previous works have shown that risk factors are associated with an increased likelihood of colorectal cancer.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>The purpose of this study was to detect these associations in the region of Lleida (Catalonia) by using multiple correspondence analysis (MCA) and k-means.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>This cross-sectional study was made up of 1083 colorectal cancer episodes between 2012 and 2015, extracted from the population-based cancer registry for the province of Lleida (Spain), the Primary Care Centers database, and the Catalan Health Service Register. The data set included risk factors such as smoking and BMI as well as sociodemographic information and tumor details. The relations between the risk factors and patient characteristics were identified using MCA and k-means.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>The combination of these techniques helps to detect clusters of patients with similar risk factors. Risk of death is associated with being elderly and obesity or being overweight. Stage III cancer is associated with people aged ≥65 years and rural/semiurban populations, while younger people were associated with stage 0.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>MCA and k-means were significantly useful for detecting associations between risk factors and patient characteristics. These techniques have proven to be effective tools for analyzing the incidence of some factors in colorectal cancer. The outcomes obtained help corroborate suspected trends and stimulate the use of these techniques for finding the association of risk factors with the incidence of other cancers.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>colorectal cancer</kwd>
        <kwd>cancer registry</kwd>
        <kwd>multiple correspondence analysis</kwd>
        <kwd>k-means</kwd>
        <kwd>risk factors</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <p>Colorectal cancer is the third most common type of cancer worldwide [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref2">2</xref>]. In Europe, around 250,000 new colorectal cancer cases are diagnosed each year, accounting for around 9% of all malignancies. The rates of this cancer increase with industrialization and urbanization. In general, the evidence shows that the incidence increases in countries where the overall risk of large bowel cancer is low, while in countries with high incidence, the rate has either stabilized or decreased, particularly among younger age groups [<xref ref-type="bibr" rid="ref3">3</xref>].</p>
      <p>In the province of Lleida (Spain), the population-based cancer registries allow the identification and counting of all incident cases (new cases) diagnosed among the residents of this geographical area [<xref ref-type="bibr" rid="ref4">4</xref>]. The residents of the Lleida region present lifestyles, risk factors, and work activity, which can be used to determine the specific incidence of certain types of cancer. Nearly half the population of the Lleida province live in rural and semiurban areas. As a consequence, their lifestyle is different from that of the more urban populations in other Catalan provinces [<xref ref-type="bibr" rid="ref5">5</xref>,<xref ref-type="bibr" rid="ref6">6</xref>]. Thus, they can present different risk factors and socioeconomic status (SES).</p>
      <p>Some studies have demonstrated a higher incidence of colorectal cancer among those with low SES and risk factors such as BMI and smoking. A pooled European cohort study [<xref ref-type="bibr" rid="ref7">7</xref>] demonstrated that adult weight gain was associated with increased risk of several major cancers. They also concluded that the degree, timing, and duration of being overweight and obesity also seemed to be important. More specifically for colon cancer, Guo et al [<xref ref-type="bibr" rid="ref8">8</xref>] presented a prospective cohort study in northern China. They concluded that obesity increased the risk of colon cancer in males. Regarding smoking, Mizoue et al [<xref ref-type="bibr" rid="ref9">9</xref>] presented a report evaluating the association in the Japanese population based on a systematic review of epidemiological evidence. This report concluded that tobacco smoking may increase the risk of colorectal cancer in the Japanese population. However, there is still insufficient epidemiological evidence to demonstrate any clear association with colon cancer. Kim et al [<xref ref-type="bibr" rid="ref10">10</xref>] studied a possible association between SES and the risk of colorectal cancer in women. Their findings suggested that high SES may protect against colorectal cancer in women. The methodology used in these studies was similar, namely, the multivariate regression analysis.</p>
      <p>Recent research has applied the techniques used in this study, but none of these studies were for cancer and risk factors. Ugurlu and Cicek [<xref ref-type="bibr" rid="ref11">11</xref>] used the multiple correspondence analysis (MCA) method to search for relations in ship collisions [<xref ref-type="bibr" rid="ref11">11</xref>]. However, the k-means algorithm was more widely used in some cancer aspects. Rustam et al [<xref ref-type="bibr" rid="ref12">12</xref>] applied this technique to obtain the centroid of each cluster and predict the class of every data point in the validation set. Recently, Ronen et al [<xref ref-type="bibr" rid="ref13">13</xref>] used k-means as an initial step in a deep learning method to evaluate the colorectal cancer subtypes. K-means allowed the detection of relevant clinical patterns that improved the prediction model. Therefore, the use of MCA and k-means to search for the relationship between risk factors and cancer incidence is a novel method.</p>
      <p>Several studies [<xref ref-type="bibr" rid="ref7">7</xref>-<xref ref-type="bibr" rid="ref10">10</xref>] have found new associations among risk factors, demographic information, and SES in patients with colorectal cancer. These studies have taken a great effort to analyze and compare risk factors such as obesity, cigarette smoking, and SES in patients with colorectal cancer. They used statistical methods, including Cox regression, Spearman rank correlation coefficient, and multilevel logistic regression to estimate the association between variables. However, none of them used a combination of a statistical method like MCA and an artificial intelligence algorithm such as k-means to search for associations between a group of categorical variables.</p>
      <p>As the main contribution of this study, we propose the use of MCA as a statistical technique to detect relations between risk factors and patients’ characteristics and k-means as an unsupervised learning algorithm to search for clusters of patients with similar risk factor profiles for colorectal cancer.</p>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Preprocessing</title>
        <p>The main information sources were the population-based cancer registry of the health region of the province of Lleida, the eCAP (a computerized medical history program used by doctors, pediatricians, and nurses in primary care centers when they see their patients [<xref ref-type="bibr" rid="ref14">14</xref>]) software, and the Central Register of Insured Persons (a register that allows the unique identification of those covered by the Catalan Health Service through the personal identification code, the management and consultation of their data, and their updates [<xref ref-type="bibr" rid="ref15">15</xref>]). Before applying the statistical technique, the information was validated by experienced professionals (doctors, nurses, and documentalists) in the Lleida population-based cancer registry who reviewed the clinical history of each patient. After that, the International Agency for Research on Cancer tool was applied to detect unlikely or impossible codes or combinations of codes [<xref ref-type="bibr" rid="ref16">16</xref>]. Then, an accurate description of the data and basic concepts of the MCA and k-means used in this work are explained in this section. See the system flow chart of the whole process in <xref rid="figure1" ref-type="fig">Figure 1</xref>; it shows the different registers used to extract the data, its process and transformation, and its applied analysis. The patients with empty fields were removed.</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>System flow chart. CRIP: Central Register of Insured Persons; PCR: population-based cancer registry.</p>
          </caption>
          <graphic xlink:href="jmir_v24i7e29056_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Study Population</title>
        <p>The colorectal cancer data were extracted from the new cases registered between 2012 and 2015 in the Lleida population-based cancer registry [<xref ref-type="bibr" rid="ref5">5</xref>,<xref ref-type="bibr" rid="ref17">17</xref>,<xref ref-type="bibr" rid="ref18">18</xref>] for patients with cancer in the main hospitals in the health care region of the Lleida province. Specifically, the data set consisted of 1083 new colorectal cancer cases. These hospitals were the Arnau de Vilanova University Hospital and the Santa Maria University Hospital, and the primary information sources were hospital records (International Classification of Diseases, ninth revision codes-140.0 to 208.9) and reports from pathological anatomy. Additionally, these reports confirmed &#62;92% of cases included in the sample. Risk factors such as BMI and smoking were extracted from eCAP software and the SES was extracted from the Central Register of Insured Persons. The study is compliant with the General Data Protection Regulation (European Union), thereby maintaining the anonymity of the patients. Cancer episodes were recorded according to international criteria. In addition, the data analysis (done with R) can be freely downloaded from this GitHub repository [<xref ref-type="bibr" rid="ref19">19</xref>]. It also included a mock data set randomly generated to test the models. The original data set could not be uploaded due to General Data Protection Regulation, which does not permit sharing patients’ information.</p>
        <p>The BMI was used to calculate the obesity of each patient by standard weight status categories [<xref ref-type="bibr" rid="ref20">20</xref>]. We categorized the BMI as the established table: &#60;24.9 as normal weight, 25-29.9 as overweight, and &#62;30 as obesity. Regarding SES, we categorized the variable according to the annual income available from the Central Register of Insured Persons. According to the legislation [<xref ref-type="bibr" rid="ref21">21</xref>], we created 2 groups: annual income &#60;€18,000 (low income) and &#62;€18,000 (high income) (€1=US $1.04). The population was categorized as rural, semiurban, and urban. In accordance with [<xref ref-type="bibr" rid="ref22">22</xref>], people living in cities with a population of more than 10,000 were classified as urban, population between 10,000 and 2000 in towns as semiurban, and the rest as rural. The Spanish National Statistics Institute has defined rural areas as those with a population of less than 2000, semiurban areas as those with a population between 2001 and 10,000, and urban areas as those with a population with more than 10,000 people. All the cancer cases that did not conform to one of these fields were discarded automatically. To sum up, each register contains the following fields: age group (50-64 years, 65-74 years, ≥75 years); gender (male, female); population (rural, semiurban, urban); exitus (death, alive); BMI (normal, overweight, obesity); smoking (ex-smoker/smoker, nonsmoker); income (high income, low income); and stage (0, I, II, III, undefined). <xref ref-type="table" rid="table1">Table 1</xref> shows the number of cases for each category.</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Principal comorbidities groups included in this study: patients with colorectal cancer between 2012 and 2015, where all the comorbidities were properly registered (N=1083).</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="770"/>
            <col width="0"/>
            <col width="200"/>
            <thead>
              <tr valign="top">
                <td colspan="3">Characteristics</td>
                <td>Values, n (%)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="4">
                  <bold>Gender</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Male</td>
                <td colspan="2">689 (63.6)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Female</td>
                <td colspan="2">394 (36.4)</td>
              </tr>
              <tr valign="top">
                <td colspan="4">
                  <bold>Age group (years)</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>50-64</td>
                <td colspan="2">319 (29.5)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>65-74</td>
                <td colspan="2">328 (30.3)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>≥75</td>
                <td colspan="2">436 (40.2)</td>
              </tr>
              <tr valign="top">
                <td colspan="4">
                  <bold>Exitus</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Death</td>
                <td colspan="2">221 (20.4)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Alive</td>
                <td colspan="2">862 (79.6)</td>
              </tr>
              <tr valign="top">
                <td colspan="4">
                  <bold>Income<sup>a</sup></bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>&#60;€18,000/year</td>
                <td colspan="2">863 (79.7)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>&#62;€18,000/year</td>
                <td colspan="2">220 (20.3)</td>
              </tr>
              <tr valign="top">
                <td colspan="4">
                  <bold>Population</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Rural</td>
                <td colspan="2">228 (21.1)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Semiurban</td>
                <td colspan="2">333 (30.7)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Urban</td>
                <td colspan="2">522 (48.2)</td>
              </tr>
              <tr valign="top">
                <td colspan="4">
                  <bold>BMI</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Normal</td>
                <td colspan="2">234 (21.6)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Overweight</td>
                <td colspan="2">506 (46.7)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Obesity</td>
                <td colspan="2">343 (31.7)</td>
              </tr>
              <tr valign="top">
                <td colspan="4">
                  <bold>Smoker</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Smoker/Ex-smoker</td>
                <td colspan="2">232 (21.4)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Nonsmoker</td>
                <td colspan="2">851 (78.6)</td>
              </tr>
              <tr valign="top">
                <td colspan="4">
                  <bold>Stage</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>0</td>
                <td colspan="2">64 (5.9)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>I</td>
                <td colspan="2">115 (10.6)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>II</td>
                <td colspan="2">168 (15.5)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>III</td>
                <td colspan="2">91 (8.4)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Undefined</td>
                <td colspan="2">645 (59.6)</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table1fn1">
              <p><sup>a</sup>€1=US $1.04.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>MCA Algorithm</title>
        <p>MCA is an unsupervised learning algorithm for visualizing the patterns in large and multidimensional categorical data [<xref ref-type="bibr" rid="ref23">23</xref>]. This method can be used to analyze, explore, summarize, and visualize information contained of individuals described by categorical variables [<xref ref-type="bibr" rid="ref24">24</xref>]. Unlike correspondence analysis (CA), MCA can deal with more than one categorical variable. This is the main advantage of the MCA technique. In our case, MCA was first used to evaluate the relationships between all the features. MCA was then used to evaluate the relationships among population, age, gender, exitus, BMI, smoking, and tumor stage. Associations between features are represented graphically [<xref ref-type="bibr" rid="ref25">25</xref>]. The graphs aim to visualize the similarities or differences in the profiles simultaneously, identifying those dimensions that contain most of the data variability. Features or their categories close to each other are significantly related statistically.</p>
        <p>The factors were interpreted with the help of various statistical coefficients, which complemented each other to provide a better interpretation. The most common and important are inertia, eigenvalue, contribution, and factorial coordinates. Inertia is a measurement of the dispersion of the set of computed distances between points. Analogously, in principal CA, inertia corresponds to the explained variance of dimensions. The eigenvalue allows the inertia that a specific category produces to be quantified determining a certain percentage relative to the entire set of the active category. The percentage coordinates (x- and y-axis) of the graph enable the category points in a graph to be represented and established. In MCA, the distance between 2 or more categories of different variables can be interpreted in terms of the associations and correlations between these. If 2 categories present high coordinates and are close in space, this means that they tend to be directly associated [<xref ref-type="bibr" rid="ref26">26</xref>,<xref ref-type="bibr" rid="ref27">27</xref>]. If 2 categories present high coordinates but are distant from each other (eg, they have opposite signs), this means that they tend to be inversely associated [<xref ref-type="bibr" rid="ref28">28</xref>,<xref ref-type="bibr" rid="ref29">29</xref>]. A heatmap was created to help the interpretation of the MCA. This plot used the intensity of the colors to show the level of association between the variables. Our graphs showed the association by the distance between the categories in the MCA plot.</p>
      </sec>
      <sec>
        <title>K-means</title>
        <p>K-means [<xref ref-type="bibr" rid="ref30">30</xref>] is a nonsupervised learning algorithm used in data mining and pattern recognition. The algorithm partitions the data set in <italic>k</italic> predefined distinct nonoverlapping subgroups (clusters) where each data point belongs to only one group. It tries to make the intracluster data points as similar as possible while also keeping the clusters as different (far) as possible. It assigns data points to a cluster such that the sum of the squared distance between the data points and the cluster’s centroid is at the minimum. The less variation we have within clusters, the more homogeneity (similarity) there is between the data points within the same cluster. The k-means algorithm is composed of the following steps: (1) it places <italic>k</italic> points in the space represented by the patients who are being clustered, (2) it assigns each patient to the group that has the closest centroid, and (3) when all patients have been assigned, it recalculates the positions of the <italic>k</italic> centroids. Steps 2 and 3 are repeated until the centroids no longer move. This produces a separation of the patients into homogenous groups while maximizing heterogeneity across groups. The optimal number of clusters was obtained by the elbow method [<xref ref-type="bibr" rid="ref31">31</xref>]. This consists of plotting the explained variation as a function of the number of clusters and picking the elbow of the curve as the number of groups to use. To assess internal cluster quality, cluster stability of the optimal solution was computed using Jaccard bootstrap values with 10,000 runs [<xref ref-type="bibr" rid="ref32">32</xref>].</p>
      </sec>
      <sec>
        <title>Statistical Analysis</title>
        <p>All the information presented was analyzed using MCA, an extension of CA, and the k-means algorithm. The combination of MCA and k-means benefits the effectiveness of the calculation process and, in consequence, the k-means results. MCA helps to reduce the noise, which allows the k-means algorithm to obtain more accurate distances. The MCA dimension reduction automatically performs data clustering according to the k-means objective function [<xref ref-type="bibr" rid="ref33">33</xref>]. In addition, the potential confounding factors in this study were assessed by calculating the distances between the variables (inertia) that take into account their relative weight in the database as a whole. However, these variables were related to each other depending on the similarity of each register. Previously, the patients with empty fields were removed.</p>
        <p>The MCA method was implemented in scripts performed with R [<xref ref-type="bibr" rid="ref34">34</xref>], an open-source programming language and environment for statistical computing and graphics. Specifically, the main library used to implement the methods and obtain the results was FactoMineR [<xref ref-type="bibr" rid="ref35">35</xref>]. K-means was written in Python [<xref ref-type="bibr" rid="ref36">36</xref>], and the main library used scikit-learn [<xref ref-type="bibr" rid="ref37">37</xref>]. These methods were launched by their default configuration and using a personal computer.</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>MCA and K-means Without the Tumor Staging</title>
        <p>The analysis of the MCA and k-means without the stage variable included 1083 registers. <xref rid="figure2" ref-type="fig">Figure 2</xref> shows the different categories and their possible associations. The variance for dimension 1 was 15% (eigenvalue 0.21) and that for dimension 2 was 12% (eigenvalue 0.17). <xref rid="figure2" ref-type="fig">Figure 2</xref> also shows the position of each category in the plot and its contribution on the dimensions. Note the contribution of mortality (15% on the negative x-axis and 10.2% on the positive y-axis), the ≥75 years age group (18.8% on the negative x-axis and 4.5% on the positive y-axis), and the ex-smoker/smoker (16.5% and 12.3% on positive x-y axis). <xref rid="figure3" ref-type="fig">Figure 3</xref> shows the relation between the categories. The associations between the points were significant when they were closer and the distance was minimum. For example, females and obesity were represented in the same dimension in the MCA plot. Therefore, the heatmap also demonstrated this association with a distance of 0.4 between the points in the MCA plot.</p>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>2D multiple correspondence analysis plot showing the correlations between the categories and their contributions for all data sets.</p>
          </caption>
          <graphic xlink:href="jmir_v24i7e29056_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <fig id="figure3" position="float">
          <label>Figure 3</label>
          <caption>
            <p>Correlations between the categories by the distance between them.</p>
          </caption>
          <graphic xlink:href="jmir_v24i7e29056_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <p>Graphically, the points closer to each other or the points represented in the same direction of the axis suggest associations. As can be seen, mortality and older age are very close in the plot. This suggests a possible association. Another possible relation observed could be between females and obesity. Then, a cloud on the positive x-axis and the negative y-axis was made up of the 65-74 years age group, high income, and survival. Finally, additional associations could be made up of the 50-64 years age group, males, smokers or ex-smokers, and normal weight.</p>
        <p><xref ref-type="table" rid="table2">Table 2</xref> shows the centroids of the main clusters obtained after applying the k-means algorithm. The recommended number of optimal clusters was 5 [<xref ref-type="bibr" rid="ref31">31</xref>] (see the GitHub [<xref ref-type="bibr" rid="ref19">19</xref>] repository to evaluate the plot). The first cluster grouped 242 registers among which the main register was males aged ≥75 years from urban populations, with low income, nonsmokers who were overweight, and with a low risk of dying. The next cluster (259 registers) represented females aged between 50 and 64 years with high income. It grouped the cases from rural populations with normal weight and survival. Cluster number 3 was made up of 180 registers. These were mostly males aged ≥75 years with low income and from semiurban populations. They were nonsmokers but were obese and unfortunately included exitus. It was the only cluster that included mortality. The fourth cluster represented urban males aged between 65 and 74 years and with low income. In this case, they were smokers or ex-smokers with normal weight and no mortality. It contained 194 registers. Finally, the last cluster was made up of 208 cases, which included semiurban females aged between 65 and 74 years with low income. They were not smokers but they were overweight. Fortunately, surviving patients predominated in this cluster and the risk of dying was low. See these clusters represented graphically in the GitHub [<xref ref-type="bibr" rid="ref19">19</xref>].</p>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>Centroids of the main clusters obtained from the k-means algorithm for all data sets.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="190"/>
            <col width="210"/>
            <col width="190"/>
            <col width="210"/>
            <col width="200"/>
            <thead>
              <tr valign="top">
                <td>Cluster 1</td>
                <td>Cluster 2</td>
                <td>Cluster 3</td>
                <td>Cluster 4</td>
                <td>Cluster 5</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Urban</td>
                <td>Rural</td>
                <td>Semiurban</td>
                <td>Urban</td>
                <td>Semiurban</td>
              </tr>
              <tr valign="top">
                <td>Age ≥75 years</td>
                <td>Age 50-64 years</td>
                <td>Age ≥75 years</td>
                <td>Age 65-74 years</td>
                <td>Age 65-74 years</td>
              </tr>
              <tr valign="top">
                <td>Low income</td>
                <td>High income</td>
                <td>Low income</td>
                <td>Low income</td>
                <td>Low income</td>
              </tr>
              <tr valign="top">
                <td>Male</td>
                <td>Female</td>
                <td>Male</td>
                <td>Male</td>
                <td>Female</td>
              </tr>
              <tr valign="top">
                <td>Nonsmoker</td>
                <td>Nonsmoker</td>
                <td>Nonsmoker</td>
                <td>Smoker/Ex-smoker</td>
                <td>Nonsmoker</td>
              </tr>
              <tr valign="top">
                <td>Overweight</td>
                <td>Normal weight</td>
                <td>Obesity</td>
                <td>Normal weight</td>
                <td>Overweight</td>
              </tr>
              <tr valign="top">
                <td>Alive</td>
                <td>Alive</td>
                <td>Death</td>
                <td>Alive</td>
                <td>Alive</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
      </sec>
      <sec>
        <title>MCA and K-means Including the Tumor Staging</title>
        <p>This subsection presents the outcomes, including the stage of the tumor. The data set used for this analysis discarded the registers, which did not contain the stage (647 registers). Therefore, the number of cases analyzed was 438 (<xref ref-type="table" rid="table1">Table 1</xref>). <xref rid="figure4" ref-type="fig">Figure 4</xref> shows the outcomes obtained after applying MCA. The variance of dimension 1 was 11.4% (eigenvalue 0.18) and that of dimension 2 was 10.2% (eigenvalue 0.16). Mortality was also one of those with the highest contribution (26.4% on the positive x-axis and 10.5% on the positive y-axis). Near this was stage III with a high contribution (16.3% on the positive x-axis and 13.7% on the positive y-axis). Ex-smoker/smoker contributed significantly compared with the rest of categories (9.1% on the negative x-axis and 1.3% on the positive y-axis). The relations between these and other categories are shown in <xref rid="figure5" ref-type="fig">Figure 5</xref>. See the death and its correlation between stage III. The heatmap differentiated this association clearly, as the MCA plot also showed. The location of the categories in the plot and their contributions suggested possible associations. The main association was between stage III and mortality and with females with stage II, the ≥75 years age group, and nonsmokers. Another relation could be males with high income, aged between 50 and 64 years, stage 0, and ex-smokers or smokers. However, these results could be affected by the decrease in cases.</p>
        <fig id="figure4" position="float">
          <label>Figure 4</label>
          <caption>
            <p>2D multiple correspondence analysis plot showing the correlations between the categories and their contributions.</p>
          </caption>
          <graphic xlink:href="jmir_v24i7e29056_fig4.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <fig id="figure5" position="float">
          <label>Figure 5</label>
          <caption>
            <p>Correlations between the categories by the distance between them including the tumor staging.</p>
          </caption>
          <graphic xlink:href="jmir_v24i7e29056_fig5.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <p><xref ref-type="table" rid="table3">Table 3</xref> shows the clusters obtained from the data set with the tumor stage. All the clusters obtained were male nonsmokers owing to the decrease in the number of registers in the data set. The first cluster with 135 cases represented obese urban patients aged between 65 and 74 years with stage II reached and a low risk of death. The second cluster had 120 registers of patients with stage II and age ≥75 years from semiurban populations. Their risk of death was also low. The next cluster included 76 registers and they were from the urban population but overweight. They included the younger patients (50-64 years age group), with a low risk of death and the lowest stage (stage 0). The fourth cluster (n=72) represented rural inhabitants, aged between 65 and 74 years. They were obese with stage III cancer but low risk of death. However, the fifth cluster was patients from the semiurban population, aged ≥75 years, overweight, in an advanced stage (III), and with a high risk of death. See these clusters represented graphically in the k-means folder of GitHub [<xref ref-type="bibr" rid="ref19">19</xref>].</p>
        <table-wrap position="float" id="table3">
          <label>Table 3</label>
          <caption>
            <p>Centroids of the main clusters obtained from the k-means algorithm: the final data set after including the stage of the tumor.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="200"/>
            <col width="200"/>
            <col width="200"/>
            <col width="200"/>
            <col width="200"/>
            <thead>
              <tr valign="top">
                <td>Cluster 1</td>
                <td>Cluster 2</td>
                <td>Cluster 3</td>
                <td>Cluster 4</td>
                <td>Cluster 5</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Urban</td>
                <td>Semiurban</td>
                <td>Urban</td>
                <td>Rural</td>
                <td>Semiurban</td>
              </tr>
              <tr valign="top">
                <td>Age 65-74 years</td>
                <td>Age ≥75 years</td>
                <td>Age 50-64 years</td>
                <td>Age 65-74 years</td>
                <td>Age ≥75 years</td>
              </tr>
              <tr valign="top">
                <td>High income</td>
                <td>Low income</td>
                <td>Low income</td>
                <td>Low income</td>
                <td>Low income</td>
              </tr>
              <tr valign="top">
                <td>Male</td>
                <td>Male</td>
                <td>Male</td>
                <td>Male</td>
                <td>Male</td>
              </tr>
              <tr valign="top">
                <td>Nonsmoker</td>
                <td>Nonsmoker</td>
                <td>Nonsmoker</td>
                <td>Nonsmoker</td>
                <td>Nonsmoker</td>
              </tr>
              <tr valign="top">
                <td>Obesity</td>
                <td>Obesity</td>
                <td>Overweight</td>
                <td>Obesity</td>
                <td>Overweight</td>
              </tr>
              <tr valign="top">
                <td>Alive</td>
                <td>Alive</td>
                <td>Alive</td>
                <td>Alive</td>
                <td>Death</td>
              </tr>
              <tr valign="top">
                <td>Stage II</td>
                <td>Stage II</td>
                <td>Stage 0</td>
                <td>Stage III</td>
                <td>Stage III</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <p>The MCA technique and the k-means algorithm permit the analysis and detection of clusters of patients with similar risk factors and outcomes not observed in the literature. The population-based cancer registry for the province of Lleida registered 1083 colorectal cancers between 2012 and 2015. This cancer is the most incident in our region [<xref ref-type="bibr" rid="ref5">5</xref>,<xref ref-type="bibr" rid="ref17">17</xref>,<xref ref-type="bibr" rid="ref18">18</xref>] and by applying MCA and k-means, some relationships were found between some aspects that corroborate the usefulness of these techniques. They helped to detect that in colorectal cancer, the age group and BMI risk factors are related. Another important corroboration was the risk of death in older people (≥75 years age group) either obese or overweight and in an advanced stage. Related to this latter factor, the advanced stage was observed in older people with obesity. Stages II and III were 65% (119/181) of the total in the ≥75 years age group.</p>
      <p>Previous studies have used clustering techniques to detect associations, but none of them were used for associating patient profiles with risk factors. We based our study on a preliminary paper [<xref ref-type="bibr" rid="ref38">38</xref>], which evaluated the relationship between air pollution, particulate matter components, and risk of breast cancer in a United States–wide prospective cohort by using a clustering technique. That study concluded that air pollution measures were related to both invasive breast cancer and ductal carcinoma in situ within certain geographic regions. Another starting point was the study presented in [<xref ref-type="bibr" rid="ref39">39</xref>], which used the combination of MCA and k-means to ascertain multimorbidity patterns. That study concluded that these techniques could help to identify these patterns. Another study our work was based on is the one presented in [<xref ref-type="bibr" rid="ref40">40</xref>], which studied the trends in the incidence of cancers associated with being overweight and obese. Another study [<xref ref-type="bibr" rid="ref41">41</xref>] analyzed the possible relation between obesity and colorectal cancer. These papers studied the impact of the risk factors on colorectal cancer but did not use the MCA technique or k-means algorithm to explore associations between these and their impact. In addition, a previous study used MCA to analyze the prognosis in surgery for low rectal cancer [<xref ref-type="bibr" rid="ref42">42</xref>]. Another study used k-means to search patterns in patients with colorectal cancer, but its main aim was to detect emotion regulation patterns and personal resilience [<xref ref-type="bibr" rid="ref43">43</xref>]. However, to the best of our knowledge, no prior studies have used MCA or k-means to link types of risk factors, SES, tumor stage, and patients’ characteristics in cases of colorectal cancer.</p>
      <p>One MCA outcome was the inertia (27%). Further, various variables had high contributions. A strong relation was obtained between older patients (≥75 years age group) and mortality. This may suggest an increase in the risk of mortality for colorectal cancer in older adults, as previous studies showed [<xref ref-type="bibr" rid="ref44">44</xref>]. On the opposite side of previous associations, it showed another association between survival, high SES, and the 65-75 years age group. Even though the contributions of these are lower than those of mortality and the older population, it is suggested that the risk of death is lower in people with high SES [<xref ref-type="bibr" rid="ref45">45</xref>] and among younger people. An association was detected between females and obesity although this was not reflected in the k-means. This relation may be because 37% (146/394) of all the women were obese. However, obese men represented 29% (205/689) of the male population, and the percentage of obesity in the data set was 31% (343/1083). This relation suggests that obese women could more likely develop colorectal cancer than men. In general, the probability of colorectal cancer in obese patients can increase by 30%-70% [<xref ref-type="bibr" rid="ref46">46</xref>]. However, although the contribution is too low to establish a strong relation, the position of males and normal weight in the plot might suggest that there may be some other factors that increase the risk of this cancer and that these techniques highlighted other associations. Some additional patient clinical history would be necessary.</p>
      <p>Regarding the k-means analysis, the third cluster confirmed the mortality in the older population with obesity [<xref ref-type="bibr" rid="ref44">44</xref>]. The first cluster also represented the ≥75 years age group but who were overweight and had no exitus. These differences between clusters suggested that obesity may be a determining factor in older persons that increases the risk of death. In addition, these 2 clusters were males. Similar outcomes were obtained in the fifth cluster when the tumor stage was added. Stage III was directly related with the ≥75 years age group, the semiurban population, and mortality, thereby suggesting that for older persons, being overweight or obese and in an advanced stage could increase the risk of death. The fourth cluster was made up of smokers or ex-smokers. Although tobacco is not usually directly related with colorectal cancer, some studies also support this result [<xref ref-type="bibr" rid="ref47">47</xref>,<xref ref-type="bibr" rid="ref48">48</xref>].</p>
      <p>The analysis then studied the data set filtered by tumor stage. The final data set was made up of 438 registers. The MCA technique obtained a significant relation between stage III and mortality. However, screening programs and technology decrease this risk, as recent studies concluded [<xref ref-type="bibr" rid="ref49">49</xref>]. We can also see that stage 0 was related with younger people (50-64 years age group). The k-means results gave similar conclusions as in the MCA. The younger people, stage 0, and survival appeared in the same cluster as demonstrated in the previous k-means analysis with the second cluster. This suggests the importance of screening programs to detect tumors at an early stage [<xref ref-type="bibr" rid="ref50">50</xref>]. The fourth cluster in the second analysis related rural and stage III. This association may insinuate a possible delay in diagnosis or difficulties in accessing the health care system and mass screening testing in rural areas [<xref ref-type="bibr" rid="ref51">51</xref>]. Finally, note that all clusters that had stage II or III also included obesity or excess weight. This may suggest that the BMI may be a determinant for having an aggressive colorectal tumor. However, no significant outcomes related to income were obtained, although 80% (863/1083) of the cases were low-income patients. This high percentage of low-income cases could be explained by the fact that the average annual net income per person in Catalonia in 2015 was €12,283 [<xref ref-type="bibr" rid="ref52">52</xref>].</p>
      <p>The strengths of using the MCA and k-means cluster analysis are that the results are less susceptible to outliers in the data, the influence of chosen distance measures, or the inclusion of inappropriate or irrelevant variables [<xref ref-type="bibr" rid="ref53">53</xref>]. This study had some limitations that should be noted. Regarding the techniques, it tends to take into account the relative weight of each variable concerning the set of study variables and allows control for potential confounding factors such as sex, age, and survival. However, some residual confounding effects cannot be ruled out. Further, these include the low number of cases with tumor stage (438/1083, 40% of total). In consequence, the final data set also made it difficult to analyze the strength of the causal relationship between different prediction parameters and outcomes because it contained few registers. The postal address registered for each case was the patient’s home address at the time of cancer diagnosis. However, this address may have changed during the study. Despite this, the number of cases with changed addresses would be very low and this factor is not expected to produce bias in the results. Some lifestyle aspects such as alcohol consumption, diabetes, or profession were not considered. The lack of cause of death is another limitation. The results showed that there is room for other kinds of risk factors. Additional patient clinical history would be required in order to find these. Further, related to the comorbidities, the Charlson index could not be added because approximately only 15% of the sample received it. A future study may be the study of the causality, adding synthetic data to enlarge the data set. Finally, some associations could hide others due to these techniques even though they showed the most significant relationships. In addition, the genetic and hereditary conditions were not considered.</p>
      <p>In conclusion, many studies demonstrate that some risk factors such as BMI, tobacco smoking, or SES could influence the incidence of colorectal cancer by using traditional techniques. This study used new techniques such as MCA and k-means to analyze the relationships between colorectal cancer and risk factors. The outcomes obtained demonstrated that the combination of these techniques could help to detect relations between risk factors and patient characteristics. Obesity and being overweight in the older population (≥75 years age group) increases the risk of developing aggressive tumors and death. Stage 0 was related with younger people and survival. This highlights the importance of screening programs for colorectal cancer. The presence of tobacco in a cluster indicated that it must be considered as a risk factor in colorectal cancer. The results of our study help to corroborate suspected trends in several of the relationships detected and confirm the usefulness of these techniques. Further, they encourage applying these methods to other cancers and detecting how the risk factors could be associated. In future work, it is important to delve deeper into the patients’ characteristics and risk factors. This means including new variables such as diabetes, alcoholism, or the cause of death. The findings obtained in this study motivate us to search for relations between risk factors in other cancers. Moreover, new techniques and artificial intelligence algorithms can be implemented to explore patterns of pretumor and posttumor detection from the clinical history.</p>
    </sec>
  </body>
  <back>
    <app-group/>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">CA</term>
          <def>
            <p>correspondence analysis</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">MCA</term>
          <def>
            <p>multiple correspondence analysis</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">SES</term>
          <def>
            <p>socioeconomic status</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>This work was supported by contract 2019-DI-43 from the Industrial Doctorate Program of the Government of Catalonia and by the Spanish Ministry of Science and Innovation under contract PID2020-113614RB-C22. Some of the authors are members of the research group 2014-SGR163, funded by the Generalitat de Catalunya.</p>
    </ack>
    <fn-group>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ferlay</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Soerjomataram</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Dikshit</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Eser</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Mathers</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Rebelo</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Parkin</surname>
              <given-names>DM</given-names>
            </name>
            <name name-style="western">
              <surname>Forman</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Bray</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <article-title>Cancer incidence and mortality worldwide: sources, methods and major patterns in GLOBOCAN 2012</article-title>
          <source>Int J Cancer</source>
          <year>2015</year>
          <month>03</month>
          <day>01</day>
          <volume>136</volume>
          <issue>5</issue>
          <fpage>E359</fpage>
          <lpage>86</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1002/ijc.29210"/>
          </comment>
          <pub-id pub-id-type="doi">10.1002/ijc.29210</pub-id>
          <pub-id pub-id-type="medline">25220842</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ferlay</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Colombet</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Soerjomataram</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Dyba</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Randi</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Bettio</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Gavin</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Visser</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Bray</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <article-title>Cancer incidence and mortality patterns in Europe: Estimates for 40 countries and 25 major cancers in 2018</article-title>
          <source>Eur J Cancer</source>
          <year>2018</year>
          <month>11</month>
          <volume>103</volume>
          <fpage>356</fpage>
          <lpage>387</lpage>
          <pub-id pub-id-type="doi">10.1016/j.ejca.2018.07.005</pub-id>
          <pub-id pub-id-type="medline">30100160</pub-id>
          <pub-id pub-id-type="pii">S0959-8049(18)30955-9</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Labianca</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Beretta</surname>
              <given-names>GD</given-names>
            </name>
            <name name-style="western">
              <surname>Kildani</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Milesi</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Merlin</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Mosconi</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Pessi</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Prochilo</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Quadri</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Gatta</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>de Braud</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Wils</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Colon cancer</article-title>
          <source>Crit Rev Oncol Hematol</source>
          <year>2010</year>
          <month>05</month>
          <volume>74</volume>
          <issue>2</issue>
          <fpage>106</fpage>
          <lpage>33</lpage>
          <pub-id pub-id-type="doi">10.1016/j.critrevonc.2010.01.010</pub-id>
          <pub-id pub-id-type="medline">20138539</pub-id>
          <pub-id pub-id-type="pii">S1040-8428(10)00011-9</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Parkin</surname>
              <given-names>DM</given-names>
            </name>
          </person-group>
          <article-title>The evolution of the population-based cancer registry</article-title>
          <source>Nat Rev Cancer</source>
          <year>2006</year>
          <month>08</month>
          <volume>6</volume>
          <issue>8</issue>
          <fpage>603</fpage>
          <lpage>12</lpage>
          <pub-id pub-id-type="doi">10.1038/nrc1948</pub-id>
          <pub-id pub-id-type="medline">16862191</pub-id>
          <pub-id pub-id-type="pii">nrc1948</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Florensa</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Pedrol</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Mòdol</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Farré</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Salud</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Mateo</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Godoy</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Resultats de l'any 2014</article-title>
          <source>Butlletí Epidemiològic Catalunya 2020</source>
          <year>2020</year>
          <volume>40</volume>
          <issue>12</issue>
          <fpage>252</fpage>
          <lpage>264</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Florensa</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Godoy</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Mateo</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Solsona</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Pedrol</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Mesas</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Pinol</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>The Use of Multiple Correspondence Analysis to Explore Associations Between Categories of Qualitative Variables and Cancer Incidence</article-title>
          <source>IEEE J Biomed Health Inform</source>
          <year>2021</year>
          <month>09</month>
          <volume>25</volume>
          <issue>9</issue>
          <fpage>3659</fpage>
          <lpage>3667</lpage>
          <pub-id pub-id-type="doi">10.1109/JBHI.2021.3073605</pub-id>
          <pub-id pub-id-type="medline">33857006</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bjørge</surname>
              <given-names>Tone</given-names>
            </name>
            <name name-style="western">
              <surname>Häggström</surname>
              <given-names>Christel</given-names>
            </name>
            <name name-style="western">
              <surname>Ghaderi</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Nagel</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Manjer</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Tretli</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Ulmer</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Harlid</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Rosendahl</surname>
              <given-names>AH</given-names>
            </name>
            <name name-style="western">
              <surname>Lang</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Stattin</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Stocks</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Engeland</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>BMI and weight changes and risk of obesity-related cancers: a pooled European cohort study</article-title>
          <source>Int J Epidemiol</source>
          <year>2019</year>
          <month>12</month>
          <day>01</day>
          <volume>48</volume>
          <issue>6</issue>
          <fpage>1872</fpage>
          <lpage>1885</lpage>
          <pub-id pub-id-type="doi">10.1093/ije/dyz188</pub-id>
          <pub-id pub-id-type="medline">31566221</pub-id>
          <pub-id pub-id-type="pii">5576152</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Guo</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Su</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Ren</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Chang</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>He</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Dai</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>[Body mass index and cancer incidence:a prospective cohort study in northern China]</article-title>
          <source>Zhonghua Liu Xing Bing Xue Za Zhi</source>
          <year>2014</year>
          <month>03</month>
          <volume>35</volume>
          <issue>3</issue>
          <fpage>231</fpage>
          <lpage>6</lpage>
          <pub-id pub-id-type="medline">24831616</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mizoue</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Inoue</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Tanaka</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Tsuji</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Wakai</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Nagata</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Tsugane</surname>
              <given-names>S</given-names>
            </name>
            <collab>Research Group for the Development‚ Evaluation of Cancer Prevention Strategies in Japan</collab>
          </person-group>
          <article-title>Tobacco smoking and colorectal cancer risk: an evaluation based on a systematic review of epidemiologic evidence among the Japanese population</article-title>
          <source>Jpn J Clin Oncol</source>
          <year>2006</year>
          <month>01</month>
          <volume>36</volume>
          <issue>1</issue>
          <fpage>25</fpage>
          <lpage>39</lpage>
          <pub-id pub-id-type="doi">10.1093/jjco/hyi207</pub-id>
          <pub-id pub-id-type="medline">16423841</pub-id>
          <pub-id pub-id-type="pii">hyi207</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Masyn</surname>
              <given-names>KE</given-names>
            </name>
            <name name-style="western">
              <surname>Kawachi</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Laden</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Colditz</surname>
              <given-names>GA</given-names>
            </name>
          </person-group>
          <article-title>Neighborhood socioeconomic status and behavioral pathways to risks of colon and rectal cancer in women</article-title>
          <source>Cancer</source>
          <year>2010</year>
          <month>09</month>
          <day>01</day>
          <volume>116</volume>
          <issue>17</issue>
          <fpage>4187</fpage>
          <lpage>96</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1002/cncr.25195"/>
          </comment>
          <pub-id pub-id-type="doi">10.1002/cncr.25195</pub-id>
          <pub-id pub-id-type="medline">20544839</pub-id>
          <pub-id pub-id-type="pmcid">PMC2962923</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ugurlu</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Cicek</surname>
              <given-names>I</given-names>
            </name>
          </person-group>
          <article-title>Analysis and assessment of ship collision accidents using Fault Tree and Multiple Correspondence Analysis</article-title>
          <source>Ocean Engineering</source>
          <year>2022</year>
          <month>02</month>
          <volume>245</volume>
          <fpage>110514</fpage>
          <pub-id pub-id-type="doi">10.1016/j.oceaneng.2021.110514</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rustam</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Hartini</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Yunus</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Pratama</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Yunus</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Hidayat</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Analysis of Architecture Combining Convolutional Neural Network (CNN) and Kernel K-Means Clustering for Lung Cancer Diagnosis</article-title>
          <source>Artic Int J Adv Sci Eng Inf Technol Internet 2020</source>
          <year>2020</year>
          <volume>10</volume>
          <issue>3</issue>
          <fpage>1200</fpage>
          <lpage>1206</lpage>
          <pub-id pub-id-type="doi">10.18517/ijaseit.10.3.12113</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ronen</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Hayat</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Akalin</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Evaluation of colorectal cancer subtypes and cell lines using deep learning</article-title>
          <source>Life Sci Alliance</source>
          <year>2019</year>
          <month>12</month>
          <volume>2</volume>
          <issue>6</issue>
          <fpage>e201900517</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.26508/lsa.201900517"/>
          </comment>
          <pub-id pub-id-type="doi">10.26508/lsa.201900517</pub-id>
          <pub-id pub-id-type="medline">31792061</pub-id>
          <pub-id pub-id-type="pii">2/6/e201900517</pub-id>
          <pub-id pub-id-type="pmcid">PMC6892438</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="web">
          <article-title>eCAP</article-title>
          <source>Departament de Salut</source>
          <access-date>2020-12-16</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://salutweb.gencat.cat/ca/ambits_actuacio/linies_dactuacio/tic/sistemes-informacio/gestio-assistencial/ecap/">https://salutweb.gencat.cat/ca/ambits_actuacio/linies_dactuacio/tic/sistemes-informacio/gestio-assistencial/ecap/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="web">
          <article-title>Registre central de població del CatSalut</article-title>
          <source>Català de la Salut</source>
          <access-date>2020-12-16</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://catsalut.gencat.cat/ca/proveidors-professionals/registres-catalegs/registres/central-poblacio/index.html">https://catsalut.gencat.cat/ca/proveidors-professionals/registres-catalegs/registres/central-poblacio/index.html</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="web">
          <source>International Agency for Research on Cancer</source>
          <access-date>2020-12-14</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.iacr.com.fr/index.php?option=com_content&#38;view=article&#38;id=72:iarccrgtools&#38;catid=68&#38;Itemid=445">http://www.iacr.com.fr/index.php?option=com_content&#38;view=article&#38;id=72:iarccrgtools&#38;catid=68&#38;Itemid=445</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Godoy</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Pedrol</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Mòdol</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Salud</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>El Registre poblacional de càncer a Lleida: resultats i perspectives</article-title>
          <source>Butlletí Epidemiològic Catalunya</source>
          <year>2016</year>
          <access-date>2022-07-06</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://scientiasalut.gencat.cat/handle/11351/3052?show=full&#38;locale-attribute=en">https://scientiasalut.gencat.cat/handle/11351/3052?show=full&#38;locale-attribute=en</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Godoy-Garcia</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Pedrol</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Mòdol-Pena</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Salud</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>El registre poblacional de càncer a Lleida: resultats de l'any 2013</article-title>
          <source>Butlletí Epidemiològic Catalunya</source>
          <year>2018</year>
          <access-date>2022-07-06</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://scientiasalut.gencat.cat/handle/11351/3665?show=full">https://scientiasalut.gencat.cat/handle/11351/3665?show=full</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Florensa</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Godoy</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Solsona</surname>
              <given-names>P</given-names>
            </name>
            <collab>et al</collab>
          </person-group>
          <article-title>The use of multiple correspondence analysis to explore associations between categories of qualitative variables and cancer incidence</article-title>
          <source>Github Repository</source>
          <access-date>2021-01-01</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://github.com/didacflorensa/MCA-Cancer">https://github.com/didacflorensa/MCA-Cancer</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Weisell</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Body mass index as an indicator of obesity</article-title>
          <source>Asia Pac J Clin Nutr</source>
          <year>2002</year>
          <volume>11</volume>
          <issue>8</issue>
          <fpage>681</fpage>
          <lpage>684</lpage>
          <pub-id pub-id-type="doi">10.1046/j.1440-6047.11.s8.5.x</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="web">
          <article-title>España, Real Decreto-ley 16/2012, de 20 de abril, de medidas urgentes para garantizar la sostenibilidad del Sistema Nacional de Salud y mejorar la calidad y seguridad de sus prestaciones</article-title>
          <source>Boletín Oficial del Estado</source>
          <access-date>2022-07-06</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.boe.es/diario_boe/txt.php?id=BOE-A-2012-5403">https://www.boe.es/diario_boe/txt.php?id=BOE-A-2012-5403</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>García</surname>
              <given-names>GJ</given-names>
            </name>
          </person-group>
          <source>La Población Rural de España: De los Desequilibrios a la Sostenibilidad Social</source>
          <year>2013</year>
          <publisher-loc>Spain</publisher-loc>
          <publisher-name>Barcelona: Fundación La Caixa</publisher-name>
          <fpage>146</fpage>
          <lpage>9</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Murtagh</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <article-title>Multiple correspondence analysis and related methods</article-title>
          <source>Psychometrika</source>
          <year>2007</year>
          <month>3</month>
          <day>24</day>
          <volume>72</volume>
          <issue>2</issue>
          <fpage>275</fpage>
          <lpage>277</lpage>
          <pub-id pub-id-type="doi">10.1007/s11336-006-1579-x</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Husson</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Josse</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Multiple correspondence analysis</article-title>
          <source>Visualization and Verbalization of Data</source>
          <year>2014</year>
          <publisher-loc>Boca Raton, Florida</publisher-loc>
          <publisher-name>CRC/PRESS</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sourial</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Wolfson</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Zhu</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Quail</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Fletcher</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Karunananthan</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Bandeen-Roche</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Béland</surname>
              <given-names>François</given-names>
            </name>
            <name name-style="western">
              <surname>Bergman</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Correspondence analysis is a useful tool to uncover the relationships among categorical variables</article-title>
          <source>J Clin Epidemiol</source>
          <year>2010</year>
          <month>06</month>
          <volume>63</volume>
          <issue>6</issue>
          <fpage>638</fpage>
          <lpage>646</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/19896800"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jclinepi.2009.08.008</pub-id>
          <pub-id pub-id-type="medline">19896800</pub-id>
          <pub-id pub-id-type="pii">S0895-4356(09)00237-6</pub-id>
          <pub-id pub-id-type="pmcid">PMC3718710</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Greenacre</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <source>Correspondence Analysis in Practice, Third Edition</source>
          <year>2017</year>
          <publisher-loc>Boca Raton, Florida</publisher-loc>
          <publisher-name>Chapman and Hall/CRC</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Roux</surname>
              <given-names>BL</given-names>
            </name>
            <name name-style="western">
              <surname>Rouanet</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <source>Geometric Data Analysis: From Correspondence Analysis to Structured Data Analysis</source>
          <year>2005</year>
          <publisher-loc>New York</publisher-loc>
          <publisher-name>Springer</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Di Franco</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Multiple correspondence analysis: one only or several techniques?</article-title>
          <source>Qual Quant</source>
          <year>2015</year>
          <month>4</month>
          <day>21</day>
          <volume>50</volume>
          <issue>3</issue>
          <fpage>1299</fpage>
          <lpage>1315</lpage>
          <pub-id pub-id-type="doi">10.1007/s11135-015-0206-0</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Heckler</surname>
              <given-names>CE</given-names>
            </name>
          </person-group>
          <article-title>Applied Multivariate Statistical Analysis</article-title>
          <source>Technometrics</source>
          <year>2005</year>
          <month>11</month>
          <volume>47</volume>
          <issue>4</issue>
          <fpage>517</fpage>
          <lpage>517</lpage>
          <pub-id pub-id-type="doi">10.1198/tech.2005.s319</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Likas</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Vlassis</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>J. Verbeek</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>The global k-means clustering algorithm</article-title>
          <source>Pattern Recognition</source>
          <year>2003</year>
          <month>2</month>
          <volume>36</volume>
          <issue>2</issue>
          <fpage>451</fpage>
          <lpage>461</lpage>
          <pub-id pub-id-type="doi">10.1016/s0031-3203(02)00060-2</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bholowalia</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Kumar</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>EBK-Means: A Clustering Technique based on Elbow MethodK-Means in WSN</article-title>
          <source>Int J Comput Appl</source>
          <year>2014</year>
          <volume>105</volume>
          <fpage>17</fpage>
          <lpage>24</lpage>
          <pub-id pub-id-type="doi">10.5120/18405-9674</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hennig</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Cluster-wise assessment of cluster stability</article-title>
          <source>Computational Statistics &#38; Data Analysis</source>
          <year>2007</year>
          <month>9</month>
          <volume>52</volume>
          <issue>1</issue>
          <fpage>258</fpage>
          <lpage>271</lpage>
          <pub-id pub-id-type="doi">10.1016/j.csda.2006.11.025</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ding</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>He</surname>
              <given-names>X</given-names>
            </name>
          </person-group>
          <article-title>K-means clustering via principal component analysis</article-title>
          <year>2004</year>
          <conf-name>Proceedings of the 21st Int Conf Mach Learn (ICML)</conf-name>
          <conf-date>2004</conf-date>
          <conf-loc>Banff, Canada</conf-loc>
          <pub-id pub-id-type="doi">10.1145/1015330.1015408</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="web">
          <source>The R Project for Statistical Computing</source>
          <year>2019</year>
          <access-date>2020-12-16</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.r-project.org/">https://www.r-project.org/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lê</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Josse</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Husson</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <article-title>FactoMineR: An R Package for Multivariate Analysis</article-title>
          <source>J Stat Soft</source>
          <year>2008</year>
          <volume>25</volume>
          <issue>1</issue>
          <fpage>1</fpage>
          <lpage>18</lpage>
          <pub-id pub-id-type="doi">10.18637/jss.v025.i01</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="web">
          <source>Welcome to Python.org</source>
          <access-date>2020-12-16</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.python.org/">https://www.python.org/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="web">
          <source>scikit-learn: MLIP</source>
          <access-date>2020-12-16</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://scikit-learn.org/stable/">https://scikit-learn.org/stable/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref38">
        <label>38</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>White</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Keller</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Carroll</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Kaufman</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Sandler</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Air Pollution, Clustering of Particulate Matter Components, and Breast Cancer in the Sister Study: A U.S.-Wide Cohort</article-title>
          <source>Environ Health Perspective</source>
          <year>2019</year>
          <volume>127</volume>
          <issue>10</issue>
          <fpage>107002</fpage>
          <pub-id pub-id-type="doi">10.1289/ehp5131</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref39">
        <label>39</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Violán</surname>
              <given-names>Concepción</given-names>
            </name>
            <name name-style="western">
              <surname>Roso-Llorach</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Foguet-Boreu</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Guisado-Clavero</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Pons-Vigués</surname>
              <given-names>Mariona</given-names>
            </name>
            <name name-style="western">
              <surname>Pujol-Ribera</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Valderas</surname>
              <given-names>JM</given-names>
            </name>
          </person-group>
          <article-title>Multimorbidity patterns with K-means nonhierarchical cluster analysis</article-title>
          <source>BMC Fam Pract</source>
          <year>2018</year>
          <month>07</month>
          <day>03</day>
          <volume>19</volume>
          <issue>1</issue>
          <fpage>108</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcfampract.biomedcentral.com/articles/10.1186/s12875-018-0790-x"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12875-018-0790-x</pub-id>
          <pub-id pub-id-type="medline">29969997</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12875-018-0790-x</pub-id>
          <pub-id pub-id-type="pmcid">PMC6031109</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref40">
        <label>40</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Steele</surname>
              <given-names>CB</given-names>
            </name>
            <name name-style="western">
              <surname>Thomas</surname>
              <given-names>CC</given-names>
            </name>
            <name name-style="western">
              <surname>Henley</surname>
              <given-names>SJ</given-names>
            </name>
            <name name-style="western">
              <surname>Massetti</surname>
              <given-names>GM</given-names>
            </name>
            <name name-style="western">
              <surname>Galuska</surname>
              <given-names>DA</given-names>
            </name>
            <name name-style="western">
              <surname>Agurs-Collins</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Puckett</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Richardson</surname>
              <given-names>LC</given-names>
            </name>
          </person-group>
          <article-title>Vital Signs: Trends in Incidence of Cancers Associated with Overweight and Obesity - United States, 2005-2014</article-title>
          <source>MMWR Morb Mortal Wkly Rep</source>
          <year>2017</year>
          <month>10</month>
          <day>03</day>
          <volume>66</volume>
          <issue>39</issue>
          <fpage>1052</fpage>
          <lpage>1058</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.15585/mmwr.mm6639e1"/>
          </comment>
          <pub-id pub-id-type="doi">10.15585/mmwr.mm6639e1</pub-id>
          <pub-id pub-id-type="medline">28981482</pub-id>
          <pub-id pub-id-type="pmcid">PMC5720881</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref41">
        <label>41</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Ng</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Zauber</surname>
              <given-names>AG</given-names>
            </name>
            <name name-style="western">
              <surname>Nguyen</surname>
              <given-names>LH</given-names>
            </name>
            <name name-style="western">
              <surname>Song</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>He</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Fuchs</surname>
              <given-names>CS</given-names>
            </name>
            <name name-style="western">
              <surname>Ogino</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Willett</surname>
              <given-names>WC</given-names>
            </name>
            <name name-style="western">
              <surname>Chan</surname>
              <given-names>AT</given-names>
            </name>
            <name name-style="western">
              <surname>Giovannucci</surname>
              <given-names>EL</given-names>
            </name>
            <name name-style="western">
              <surname>Cao</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Association of Obesity With Risk of Early-Onset Colorectal Cancer Among Women</article-title>
          <source>JAMA Oncol</source>
          <year>2019</year>
          <month>01</month>
          <day>01</day>
          <volume>5</volume>
          <issue>1</issue>
          <fpage>37</fpage>
          <lpage>44</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/30326010"/>
          </comment>
          <pub-id pub-id-type="doi">10.1001/jamaoncol.2018.4280</pub-id>
          <pub-id pub-id-type="medline">30326010</pub-id>
          <pub-id pub-id-type="pii">2705608</pub-id>
          <pub-id pub-id-type="pmcid">PMC6382547</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref42">
        <label>42</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mancini</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Pattaro</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Diodoro</surname>
              <given-names>MG</given-names>
            </name>
            <name name-style="western">
              <surname>Sperduti</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Garufi</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Stigliano</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Perri</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Grazi</surname>
              <given-names>GL</given-names>
            </name>
            <name name-style="western">
              <surname>Cosimelli</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Tumor Regression Grade After Neoadjuvant Chemoradiation and Surgery for Low Rectal Cancer Evaluated by Multiple Correspondence Analysis: Ten Years as Minimum Follow-up</article-title>
          <source>Clin Colorectal Cancer</source>
          <year>2018</year>
          <month>03</month>
          <volume>17</volume>
          <issue>1</issue>
          <fpage>e13</fpage>
          <lpage>e19</lpage>
          <pub-id pub-id-type="doi">10.1016/j.clcc.2017.06.004</pub-id>
          <pub-id pub-id-type="medline">28865674</pub-id>
          <pub-id pub-id-type="pii">S1533-0028(16)30276-6</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref43">
        <label>43</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Baziliansky</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Cohen</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Emotion Regulation Patterns among Colorectal Cancer Survivors: Clustering and Associations with Personal Coping Resources</article-title>
          <source>Behav Med</source>
          <year>2021</year>
          <volume>47</volume>
          <issue>3</issue>
          <fpage>214</fpage>
          <lpage>224</lpage>
          <pub-id pub-id-type="doi">10.1080/08964289.2020.1731674</pub-id>
          <pub-id pub-id-type="medline">32275195</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref44">
        <label>44</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Bi</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Meng</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Zhou</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Yu</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>Z</given-names>
            </name>
          </person-group>
          <article-title>Different trends in colorectal cancer mortality between age groups in China: an age-period-cohort and joinpoint analysis</article-title>
          <source>Public Health</source>
          <year>2019</year>
          <month>01</month>
          <volume>166</volume>
          <fpage>45</fpage>
          <lpage>52</lpage>
          <pub-id pub-id-type="doi">10.1016/j.puhe.2018.08.007</pub-id>
          <pub-id pub-id-type="medline">30447645</pub-id>
          <pub-id pub-id-type="pii">S0033-3506(18)30274-9</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref45">
        <label>45</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hastert</surname>
              <given-names>TA</given-names>
            </name>
            <name name-style="western">
              <surname>Beresford</surname>
              <given-names>SAA</given-names>
            </name>
            <name name-style="western">
              <surname>Sheppard</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>White</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Disparities in cancer incidence and mortality by area-level socioeconomic status: a multilevel analysis</article-title>
          <source>J Epidemiol Community Health</source>
          <year>2015</year>
          <month>03</month>
          <volume>69</volume>
          <issue>2</issue>
          <fpage>168</fpage>
          <lpage>76</lpage>
          <pub-id pub-id-type="doi">10.1136/jech-2014-204417</pub-id>
          <pub-id pub-id-type="medline">25288143</pub-id>
          <pub-id pub-id-type="pii">jech-2014-204417</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref46">
        <label>46</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bardou</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Barkun</surname>
              <given-names>AN</given-names>
            </name>
            <name name-style="western">
              <surname>Martel</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Obesity and colorectal cancer</article-title>
          <source>Gut</source>
          <year>2013</year>
          <month>06</month>
          <volume>62</volume>
          <issue>6</issue>
          <fpage>933</fpage>
          <lpage>47</lpage>
          <pub-id pub-id-type="doi">10.1136/gutjnl-2013-304701</pub-id>
          <pub-id pub-id-type="medline">23481261</pub-id>
          <pub-id pub-id-type="pii">gutjnl-2013-304701</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref47">
        <label>47</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rawla</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Sunkara</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Barsouk</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Incidence, mortality, survival, and risk factors</article-title>
          <source>Gastroenterology Review</source>
          <year>2019</year>
          <volume>14</volume>
          <issue>2</issue>
          <fpage>89</fpage>
          <lpage>103</lpage>
          <pub-id pub-id-type="doi">10.5114/pg.2018.81072</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref48">
        <label>48</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Botteri</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Iodice</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Bagnardi</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Raimondi</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Lowenfels</surname>
              <given-names>AB</given-names>
            </name>
            <name name-style="western">
              <surname>Maisonneuve</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Smoking and colorectal cancer: a meta-analysis</article-title>
          <source>JAMA</source>
          <year>2008</year>
          <month>12</month>
          <day>17</day>
          <volume>300</volume>
          <issue>23</issue>
          <fpage>2765</fpage>
          <lpage>78</lpage>
          <pub-id pub-id-type="doi">10.1001/jama.2008.839</pub-id>
          <pub-id pub-id-type="medline">19088354</pub-id>
          <pub-id pub-id-type="pii">300/23/2765</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref49">
        <label>49</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Brouwer</surname>
              <given-names>NPM</given-names>
            </name>
            <name name-style="western">
              <surname>Bos</surname>
              <given-names>ACRK</given-names>
            </name>
            <name name-style="western">
              <surname>Lemmens</surname>
              <given-names>VEPP</given-names>
            </name>
            <name name-style="western">
              <surname>Tanis</surname>
              <given-names>PJ</given-names>
            </name>
            <name name-style="western">
              <surname>Hugen</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Nagtegaal</surname>
              <given-names>ID</given-names>
            </name>
            <name name-style="western">
              <surname>de Wilt</surname>
              <given-names>JHW</given-names>
            </name>
            <name name-style="western">
              <surname>Verhoeven</surname>
              <given-names>RHA</given-names>
            </name>
          </person-group>
          <article-title>An overview of 25 years of incidence, treatment and outcome of colorectal cancer patients</article-title>
          <source>Int J Cancer</source>
          <year>2018</year>
          <month>12</month>
          <day>01</day>
          <volume>143</volume>
          <issue>11</issue>
          <fpage>2758</fpage>
          <lpage>2766</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1002/ijc.31785"/>
          </comment>
          <pub-id pub-id-type="doi">10.1002/ijc.31785</pub-id>
          <pub-id pub-id-type="medline">30095162</pub-id>
          <pub-id pub-id-type="pmcid">PMC6282554</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref50">
        <label>50</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Brenner</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Chang-Claude</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Jansen</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Knebel</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Stock</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Hoffmeister</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Reduced risk of colorectal cancer up to 10 years after screening, surveillance, or diagnostic colonoscopy</article-title>
          <source>Gastroenterology</source>
          <year>2014</year>
          <month>03</month>
          <volume>146</volume>
          <issue>3</issue>
          <fpage>709</fpage>
          <lpage>17</lpage>
          <pub-id pub-id-type="doi">10.1053/j.gastro.2013.09.001</pub-id>
          <pub-id pub-id-type="medline">24012982</pub-id>
          <pub-id pub-id-type="pii">S0016-5085(13)01278-X</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref51">
        <label>51</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Henley</surname>
              <given-names>SJ</given-names>
            </name>
            <name name-style="western">
              <surname>Anderson</surname>
              <given-names>RN</given-names>
            </name>
            <name name-style="western">
              <surname>Thomas</surname>
              <given-names>CC</given-names>
            </name>
            <name name-style="western">
              <surname>Massetti</surname>
              <given-names>GM</given-names>
            </name>
            <name name-style="western">
              <surname>Peaker</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Richardson</surname>
              <given-names>LC</given-names>
            </name>
          </person-group>
          <article-title>Invasive Cancer Incidence, 2004-2013, and Deaths, 2006-2015, in Nonmetropolitan and Metropolitan Counties - United States</article-title>
          <source>MMWR Surveill Summ</source>
          <year>2017</year>
          <month>07</month>
          <day>07</day>
          <volume>66</volume>
          <issue>14</issue>
          <fpage>1</fpage>
          <lpage>13</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/28683054"/>
          </comment>
          <pub-id pub-id-type="doi">10.15585/mmwr.ss6614a1</pub-id>
          <pub-id pub-id-type="medline">28683054</pub-id>
          <pub-id pub-id-type="pmcid">PMC5879727</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref52">
        <label>52</label>
        <nlm-citation citation-type="web">
          <article-title>Average annual net income of households</article-title>
          <source>Statistical Institute of Catalonia</source>
          <year>2015</year>
          <access-date>2020-12-16</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.idescat.cat/pub/?id=aec&#38;n=414&#38;t=2015&#38;lang=en">https://www.idescat.cat/pub/?id=aec&#38;n=414&#38;t=2015&#38;lang=en</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref53">
        <label>53</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Liao</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Kianifard</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Obi</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Arcona</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Cluster analysis and its application to healthcare claims data: a study of end-stage renal disease patients who initiated hemodialysis</article-title>
          <source>BMC Nephrol</source>
          <year>2016</year>
          <month>03</month>
          <day>02</day>
          <volume>17</volume>
          <fpage>25</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.biomedcentral.com/1471-2369/17/25"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12882-016-0238-2</pub-id>
          <pub-id pub-id-type="medline">26936756</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12882-016-0238-2</pub-id>
          <pub-id pub-id-type="pmcid">PMC4776444</pub-id>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
