<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<?covid-19-tdm?>
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="letter" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMIR</journal-id>
      <journal-id journal-id-type="nlm-ta">J Med Internet Res</journal-id>
      <journal-title>Journal of Medical Internet Research</journal-title>
      <issn pub-type="epub">1438-8871</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v23i5e28666</article-id>
      <article-id pub-id-type="pmid">33989165</article-id>
      <article-id pub-id-type="doi">10.2196/28666</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Letter to the Editor</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Letter to the Editor</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Redundancy of Terms in Search Strategies. Comment on “Searching PubMed to Retrieve Publications on the COVID-19 Pandemic: Comparative Analysis of Search Strings”</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Derrick</surname>
            <given-names>Thomas</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Campos</surname>
            <given-names>Daniel Melo De Oliveira</given-names>
          </name>
          <degrees>MSc</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-4016-3018</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Fulco</surname>
            <given-names>Umberto Laino</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-4528-9878</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author" corresp="yes" equal-contrib="yes">
          <name name-style="western">
            <surname>Oliveira</surname>
            <given-names>Jonas Ivan Nobre</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>Universidade Federal do Rio Grande do Norte</institution>
            <addr-line>Departamento de Biofísica e Farmacologia</addr-line>
            <addr-line>Natal, 59072-970</addr-line>
            <country>Brazil</country>
            <phone>55 8432153793</phone>
            <email>jonasivan@gmail.com</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-1646-921X</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Universidade Federal do Rio Grande do Norte</institution>
        <addr-line>Natal</addr-line>
        <country>Brazil</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Jonas Ivan Nobre Oliveira <email>jonasivan@gmail.com</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <month>5</month>
        <year>2021</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>28</day>
        <month>5</month>
        <year>2021</year>
      </pub-date>
      <volume>23</volume>
      <issue>5</issue>
      <elocation-id>e28666</elocation-id>
      <history>
        <date date-type="received">
          <day>10</day>
          <month>3</month>
          <year>2021</year>
        </date>
        <date date-type="accepted">
          <day>13</day>
          <month>5</month>
          <year>2021</year>
        </date>
      </history>
      <copyright-statement>©Daniel Melo De Oliveira Campos, Umberto Laino Fulco, Jonas Ivan Nobre Oliveira. Originally published in the Journal of Medical Internet Research (https://www.jmir.org), 28.05.2021.</copyright-statement>
      <copyright-year>2021</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in the Journal of Medical Internet Research, is properly cited. The complete bibliographic information, a link to the original publication on https://www.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://www.jmir.org/2021/5/e28666" xlink:type="simple"/>
      <related-article related-article-type="commentary-article" id="v22i11e23449" ext-link-type="doi" xlink:href="10.2196/23449" vol="22" page="e23449" xlink:type="simple">https://www.jmir.org/2020/11/e23449/</related-article>
      <related-article related-article-type="commentary" id="v23i5e29507" ext-link-type="doi" xlink:href="10.2196/29507" vol="23" page="e29507" xlink:type="simple">https://www.jmir.org/2021/5/e29507/</related-article>
      <kwd-group>
        <kwd>coronavirus</kwd>
        <kwd>COVID-19</kwd>
        <kwd>pandemic</kwd>
        <kwd>scientific publishing</kwd>
        <kwd>PubMed</kwd>
        <kwd>literature searching</kwd>
        <kwd>research</kwd>
        <kwd>literature</kwd>
        <kwd>search</kwd>
        <kwd>performance</kwd>
        <kwd>search strategy</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <p>Recently, a very interesting study on the performance of different search strategies for COVID-19 records in PubMed was published in the <italic>Journal of Medical Internet Research</italic> [<xref ref-type="bibr" rid="ref1">1</xref>]. In this article, Lazarus et al compared the performance of PubMed’s one-click search option with both simpler and more complex search strings. Novice and expert searchers do well to keep these in mind when searching. For instance, a search strategy for a review is a time-consuming endeavor, and energy spent on locating relevant controlled vocabulary and keywords can be undermined by errors in formatting, compilation, and translation of these terms. Unfortunately, the presence of these errors is extremely common even among published studies. Sampson and McGowan [<xref ref-type="bibr" rid="ref2">2</xref>] reviewed studies published in Cochrane and discovered that 90.5% of their sample had a search strategy that contained one or more errors. Some related to errors regarding term, or term variant, identifications, but others pertained to errors in the formatting and basic compilation of the terms. The latter category included Boolean errors (19%), incorrect line numbers (1.6%), the use of Medical Subject Headings (MeSH) and free text terms combined on the same line (20.6%), and the search strategy not being appropriately translated for other databases (20.6%).</p>
    <p>In 2018, a study with a random sample of 70 Cochrane Reviews found problems in the design of the search strategies in 73% of reviews, and 53% of these contained problems that could limit both the sensitivity and precision of the search [<xref ref-type="bibr" rid="ref3">3</xref>]. Recently, Salvador-Oliván et al (2019) [<xref ref-type="bibr" rid="ref4">4</xref>] evaluated the search strategies of 137 systematic reviews in PubMed to identify errors, analyze their impact on information retrieval, and propose solutions. The results of this study reveal that the percentage of search strategies that contain various types of errors is quite high (92.7%) and that 78.1% of these errors affect recall. Although a substantial proportion of the errors came from inadequate identification of terms, errors were also introduced at the formatting level, with an absence of field tags (21.2%) and lack or incorrect use of quotation marks (5.8%), Boolean operators (1.5%), and parentheses (5.1%) [<xref ref-type="bibr" rid="ref4">4</xref>].</p>
    <p>As to be expected, some errors have graver effects on results than others. Errors that have no effect at all on the number of results include redundant terms and morphological repetition; these “search errors” do not affect recall or negatively affect information retrieval with respect to either recall or precision.</p>
    <p>An example of redundancy is as follows: “2019 novel coronavirus disease”[tw] OR “2019 novel coronavirus infection”[tw] OR “2019-nCoV disease”[tw] OR “2019-nCoV infection”[tw] OR “COVID-19 pandemic”[tw] OR “COVID-19 virus disease”[tw] OR “COVID-19 virus infection”[tw] OR “COVID19”[tw] OR “SARS-CoV-2 infection”[tw] OR “coronavirus disease 2019”[tw] OR “coronavirus disease-19”[tw] OR “COVID-19 pandemic”[tw] OR “COVID-19”[tw]. Authors justify redundancy because the decision to include or exclude terms depends on the references retrieved, as the effect of the terms on the results is impossible to predict. However, it is known beforehand that the first 11 terms in a PubMed search can be easily discarded because using the 12th variation will cover all 11, so other terms are unnecessary.</p>
    <p>In terms of the search process, tools pertaining to data mining have been developed to help librarians identify relevant terms. Some text-mining approaches have been documented by Stansfield et al [<xref ref-type="bibr" rid="ref5">5</xref>], including TFIDF, Termine, and BibExcel. Also recommended are librarian tools that often have a particular focus on the MeSH thesaurus, such as PubMed PubReMiner [<xref ref-type="bibr" rid="ref6">6</xref>] and Yale MeSH Analyzer [<xref ref-type="bibr" rid="ref7">7</xref>] for keywords and controlled vocabulary.</p>
    <p>Created and updated by the United States National Library of Medicine, MeSH vocabulary is used by the ClinicalTrials.gov registry to classify which diseases are studied by the trials registered in its database. This hierarchically organized terminology for indexing and cataloging of biomedical information is divided into four types of terms. The main terms are the “headings” (also known as MeSH headings or descriptors), which describe the subject of each article. Most of these are accompanied by a list of synonyms or very similar terms (known as entry terms). When performing a MEDLINE search via PubMed, entry terms are automatically translated into (ie, mapped to) the corresponding descriptors with a good degree of reliability. In this sense, we highlighted the importance of using the controlled vocabulary “COVID-19” (unique id: C000657245) and “SARS-CoV-2” (unique id: D000086402) in PubMed searches focused on COVID-19–related studies, and not the set of terms (search 1, 2, 3, 6, 7, and 8) analyzed by Lazarus and collaborators [<xref ref-type="bibr" rid="ref1">1</xref>].</p>
    <p>Redundant terms in a search strategy do not affect the retrieval of information; however, the principle of parsimony instructs us to eliminate that which is unnecessary. Applied to information retrieval, this principle prompts us to eliminate any terms or phrases from a search strategy that do not retrieve or provide new records, as they are thus unnecessary.</p>
  </body>
  <back>
    <app-group/>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">MeSH</term>
          <def>
            <p>Medical Subject Headings</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>This work was partially financed by the Brazilian Research Agencies CAPES (PNPD) and CNPq (Finance Code 001).</p>
    </ack>
    <fn-group>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lazarus</surname>
              <given-names>Jeffrey V</given-names>
            </name>
            <name name-style="western">
              <surname>Palayew</surname>
              <given-names>Adam</given-names>
            </name>
            <name name-style="western">
              <surname>Rasmussen</surname>
              <given-names>Lauge Neimann</given-names>
            </name>
            <name name-style="western">
              <surname>Andersen</surname>
              <given-names>Tue Helms</given-names>
            </name>
            <name name-style="western">
              <surname>Nicholson</surname>
              <given-names>Joey</given-names>
            </name>
            <name name-style="western">
              <surname>Norgaard</surname>
              <given-names>Ole</given-names>
            </name>
          </person-group>
          <article-title>Searching PubMed to retrieve publications on the COVID-19 pandemic: comparative analysis of search strings</article-title>
          <source>J Med Internet Res</source>
          <year>2020</year>
          <month>11</month>
          <day>26</day>
          <volume>22</volume>
          <issue>11</issue>
          <fpage>e23449</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2020/11/e23449/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/23449</pub-id>
          <pub-id pub-id-type="medline">33197230</pub-id>
          <pub-id pub-id-type="pii">v22i11e23449</pub-id>
          <pub-id pub-id-type="pmcid">PMC7695541</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sampson</surname>
              <given-names>Margaret</given-names>
            </name>
            <name name-style="western">
              <surname>McGowan</surname>
              <given-names>Jessie</given-names>
            </name>
          </person-group>
          <article-title>Errors in search strategies were identified by type and frequency</article-title>
          <source>J Clin Epidemiol</source>
          <year>2006</year>
          <month>10</month>
          <volume>59</volume>
          <issue>10</issue>
          <fpage>1057</fpage>
          <lpage>63</lpage>
          <pub-id pub-id-type="doi">10.1016/j.jclinepi.2006.01.007</pub-id>
          <pub-id pub-id-type="medline">16980145</pub-id>
          <pub-id pub-id-type="pii">S0895-4356(06)00043-6</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Franco</surname>
              <given-names>Juan Víctor Ariel</given-names>
            </name>
            <name name-style="western">
              <surname>Garrote</surname>
              <given-names>Virginia Laura</given-names>
            </name>
            <name name-style="western">
              <surname>Escobar Liquitay</surname>
              <given-names>Camila Micaela</given-names>
            </name>
            <name name-style="western">
              <surname>Vietto</surname>
              <given-names>Valeria</given-names>
            </name>
          </person-group>
          <article-title>Identification of problems in search strategies in Cochrane Reviews</article-title>
          <source>Res Synth Methods</source>
          <year>2018</year>
          <month>09</month>
          <volume>9</volume>
          <issue>3</issue>
          <fpage>408</fpage>
          <lpage>416</lpage>
          <pub-id pub-id-type="doi">10.1002/jrsm.1302</pub-id>
          <pub-id pub-id-type="medline">29761662</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Salvador-Oliván</surname>
              <given-names>JÁ</given-names>
            </name>
            <name name-style="western">
              <surname>Marco-Cuenca</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Arquero-Avilés</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Errors in search strategies used in systematic reviews and their effects on information retrieval</article-title>
          <source>J Med Libr Assoc</source>
          <year>2019</year>
          <month>04</month>
          <day>15</day>
          <volume>107</volume>
          <issue>2</issue>
          <fpage>210</fpage>
          <pub-id pub-id-type="doi">10.5195/jmla.2019.567</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Stansfield</surname>
              <given-names>Claire</given-names>
            </name>
            <name name-style="western">
              <surname>O'Mara-Eves</surname>
              <given-names>Alison</given-names>
            </name>
            <name name-style="western">
              <surname>Thomas</surname>
              <given-names>James</given-names>
            </name>
          </person-group>
          <article-title>Text mining for search term development in systematic reviewing: a discussion of some methods and challenges</article-title>
          <source>Res Synth Methods</source>
          <year>2017</year>
          <month>09</month>
          <volume>8</volume>
          <issue>3</issue>
          <fpage>355</fpage>
          <lpage>365</lpage>
          <pub-id pub-id-type="doi">10.1002/jrsm.1250</pub-id>
          <pub-id pub-id-type="medline">28660680</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Slater</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>PubMed PubReMiner</article-title>
          <source>J Can Health Libr Assoc</source>
          <year>2014</year>
          <month>07</month>
          <day>21</day>
          <volume>33</volume>
          <issue>2</issue>
          <fpage>106</fpage>
          <pub-id pub-id-type="doi">10.5596/c2012-014</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hocking</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Yale MeSH Analyzer</article-title>
          <source>J Can Health Libr Assoc</source>
          <year>2017</year>
          <month>12</month>
          <day>01</day>
          <volume>38</volume>
          <issue>3</issue>
          <pub-id pub-id-type="doi">10.29173/jchla/jabsc.v38i3.29336</pub-id>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
