<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<?covid-19-tdm?>
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMIR</journal-id>
      <journal-id journal-id-type="nlm-ta">J Med Internet Res</journal-id>
      <journal-title>Journal of Medical Internet Research</journal-title>
      <issn pub-type="epub">1438-8871</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v23i9e30161</article-id>
      <article-id pub-id-type="pmid">34375298</article-id>
      <article-id pub-id-type="doi">10.2196/30161</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Information Retrieval in an Infodemic: The Case of COVID-19 Publications</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Basch</surname>
            <given-names>Corey</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Lotto</surname>
            <given-names>Matheus</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Guo</surname>
            <given-names>Lei</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author" corresp="yes" equal-contrib="yes">
          <name name-style="western">
            <surname>Teodoro</surname>
            <given-names>Douglas</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff2" ref-type="aff">2</xref>
          <xref rid="aff3" ref-type="aff">3</xref>
          <address>
            <institution>Department of Radiology and Medical Informatics</institution>
            <institution>University of Geneva</institution>
            <addr-line>Campus Biotech G6-N3 - Chemin des Mines 9</addr-line>
            <addr-line>Geneva, 1202</addr-line>
            <country>Switzerland</country>
            <phone>41 022 379 0225</phone>
            <email>douglas.teodoro@unige.ch</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-6238-4503</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Ferdowsi</surname>
            <given-names>Sohrab</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-3768-6408</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Borissov</surname>
            <given-names>Nikolay</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff4" ref-type="aff">4</xref>
          <xref rid="aff5" ref-type="aff">5</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-8423-9873</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Kashani</surname>
            <given-names>Elham</given-names>
          </name>
          <degrees>MSc</degrees>
          <xref rid="aff6" ref-type="aff">6</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-4221-4926</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author">
          <name name-style="western">
            <surname>Vicente Alvarez</surname>
            <given-names>David</given-names>
          </name>
          <degrees>BSc</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-6319-3765</ext-link>
        </contrib>
        <contrib id="contrib6" contrib-type="author">
          <name name-style="western">
            <surname>Copara</surname>
            <given-names>Jenny</given-names>
          </name>
          <degrees>MSc</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff2" ref-type="aff">2</xref>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-1510-3331</ext-link>
        </contrib>
        <contrib id="contrib7" contrib-type="author">
          <name name-style="western">
            <surname>Gouareb</surname>
            <given-names>Racha</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-6611-2548</ext-link>
        </contrib>
        <contrib id="contrib8" contrib-type="author">
          <name name-style="western">
            <surname>Naderi</surname>
            <given-names>Nona</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-1272-7640</ext-link>
        </contrib>
        <contrib id="contrib9" contrib-type="author">
          <name name-style="western">
            <surname>Amini</surname>
            <given-names>Poorya</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff4" ref-type="aff">4</xref>
          <xref rid="aff5" ref-type="aff">5</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-9473-0172</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Geneva School of Business Administration</institution>
        <institution>HES-SO University of Applied Arts and Sciences of Western Switzerland</institution>
        <addr-line>Carouge</addr-line>
        <country>Switzerland</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Swiss Institute of Bioinformatics</institution>
        <addr-line>Lausanne</addr-line>
        <country>Switzerland</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>Department of Radiology and Medical Informatics</institution>
        <institution>University of Geneva</institution>
        <addr-line>Geneva</addr-line>
        <country>Switzerland</country>
      </aff>
      <aff id="aff4">
        <label>4</label>
        <institution>Risklick AG</institution>
        <addr-line>Bern</addr-line>
        <country>Switzerland</country>
      </aff>
      <aff id="aff5">
        <label>5</label>
        <institution>Clinical Trials Unit</institution>
        <addr-line>Bern</addr-line>
        <country>Switzerland</country>
      </aff>
      <aff id="aff6">
        <label>6</label>
        <institution>Institute of Pathology</institution>
        <institution>University of Bern</institution>
        <addr-line>Bern</addr-line>
        <country>Switzerland</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Douglas Teodoro <email>douglas.teodoro@unige.ch</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <month>9</month>
        <year>2021</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>17</day>
        <month>9</month>
        <year>2021</year>
      </pub-date>
      <volume>23</volume>
      <issue>9</issue>
      <elocation-id>e30161</elocation-id>
      <history>
        <date date-type="received">
          <day>3</day>
          <month>5</month>
          <year>2021</year>
        </date>
        <date date-type="rev-request">
          <day>1</day>
          <month>7</month>
          <year>2021</year>
        </date>
        <date date-type="rev-recd">
          <day>22</day>
          <month>7</month>
          <year>2021</year>
        </date>
        <date date-type="accepted">
          <day>5</day>
          <month>8</month>
          <year>2021</year>
        </date>
      </history>
      <copyright-statement>©Douglas Teodoro, Sohrab Ferdowsi, Nikolay Borissov, Elham Kashani, David Vicente Alvarez, Jenny Copara, Racha Gouareb, Nona Naderi, Poorya Amini. Originally published in the Journal of Medical Internet Research (https://www.jmir.org), 17.09.2021.</copyright-statement>
      <copyright-year>2021</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in the Journal of Medical Internet Research, is properly cited. The complete bibliographic information, a link to the original publication on https://www.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://www.jmir.org/2021/9/e30161" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>The COVID-19 global health crisis has led to an exponential surge in published scientific literature. In an attempt to tackle the pandemic, extremely large COVID-19–related corpora are being created, sometimes with inaccurate information, which is no longer at scale of human analyses.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>In the context of searching for scientific evidence in the deluge of COVID-19–related literature, we present an information retrieval methodology for effective identification of relevant sources to answer biomedical queries posed using natural language.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>Our multistage retrieval methodology combines probabilistic weighting models and reranking algorithms based on deep neural architectures to boost the ranking of relevant documents. Similarity of COVID-19 queries is compared to documents, and a series of postprocessing methods is applied to the initial ranking list to improve the match between the query and the biomedical information source and boost the position of relevant documents.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>The methodology was evaluated in the context of the TREC-COVID challenge, achieving competitive results with the top-ranking teams participating in the competition. Particularly, the combination of bag-of-words and deep neural language models significantly outperformed an Okapi Best Match 25–based baseline, retrieving on average, 83% of relevant documents in the top 20.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>These results indicate that multistage retrieval supported by deep learning could enhance identification of literature for COVID-19–related questions posed using natural language.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>information retrieval</kwd>
        <kwd>multistage retrieval</kwd>
        <kwd>neural search</kwd>
        <kwd>deep learning</kwd>
        <kwd>COVID-19</kwd>
        <kwd>coronavirus</kwd>
        <kwd>infodemic</kwd>
        <kwd>infodemiology</kwd>
        <kwd>literature</kwd>
        <kwd>online information</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <sec>
        <title>Background</title>
        <p>In parallel with its public health crisis with vast social and economic impacts, the COVID-19 pandemic has resulted in an explosive surge of activities within scientific communities and across many disciplines [<xref ref-type="bibr" rid="ref1">1</xref>]. In turn, it has led to an overabundance of information online and offline — a phenomenon described as an infodemic [<xref ref-type="bibr" rid="ref2">2</xref>-<xref ref-type="bibr" rid="ref4">4</xref>] — with often negative impacts on the population [<xref ref-type="bibr" rid="ref5">5</xref>]. Since early 2020 when the pandemic was officially announced, the number of publications related to COVID-19 has had exponential growth [<xref ref-type="bibr" rid="ref6">6</xref>]. In addition to the volume and velocity of the generated data, the heterogeneity as a result of the typical variety of concept naming found in the biomedical field, spelling mistakes, and the different source types [<xref ref-type="bibr" rid="ref7">7</xref>] make searching and discovery of relevant literature within the COVID-19 corpora an important challenge [<xref ref-type="bibr" rid="ref2">2</xref>].</p>
        <p>With the sheer quantity of COVID-19 information continuously produced, researchers, policy makers, journalists, and ordinary citizens, among others, are unable to keep up with the fast-evolving body of knowledge disseminated. As knowledge about the pandemic evolves, study results and conclusions may be improved, contradicted, or even proven wrong [<xref ref-type="bibr" rid="ref3">3</xref>]. Combined with relentless media coverage and social media interactions, this fast-changing and massive amount of information leads to confusion and desensitization among audiences (eg, as in the case of school opening guidelines and mask-wearing and social distancing recommendations) [<xref ref-type="bibr" rid="ref5">5</xref>,<xref ref-type="bibr" rid="ref8">8</xref>]. They also fuel deliberate attempts to create information disorders, such as misinformation, disinformation, malinformation, and fake news [<xref ref-type="bibr" rid="ref9">9</xref>], reducing the effectiveness of public health measures and endangering countries’ ability to stop the pandemic, ultimately having a negative impact on live costs [<xref ref-type="bibr" rid="ref10">10</xref>,<xref ref-type="bibr" rid="ref11">11</xref>].</p>
        <p>To support states and relevant actors of society to manage the COVID-19 infodemic, the World Health Organization (WHO) has published a framework containing 50 recommendations, of which more than 20% are related to strengthening the scanning, review, and verification of evidence and information [<xref ref-type="bibr" rid="ref2">2</xref>]. To help actors involved with the pandemic find the most relevant information for their needs, effective information retrieval models for the COVID-19–related corpora became thus a prominent necessity [<xref ref-type="bibr" rid="ref12">12</xref>]. The information retrieval community, in turn, has responded actively and quickly to this extraordinary situation and has been aiming to address these challenges. To foster research for the scientific communities involved with the pandemic, the COVID-19 Open Research Dataset (CORD-19) [<xref ref-type="bibr" rid="ref13">13</xref>] collection was built to maintain all the related publications for the family of coronaviruses. This dataset helped research in various directions, and several tasks are built around it, including natural language processing (NLP)–related tasks, like question answering [<xref ref-type="bibr" rid="ref14">14</xref>] and language model pretraining [<xref ref-type="bibr" rid="ref15">15</xref>], and information retrieval challenges in Kaggle [<xref ref-type="bibr" rid="ref16">16</xref>] as well as the TREC-COVID [<xref ref-type="bibr" rid="ref17">17</xref>,<xref ref-type="bibr" rid="ref18">18</xref>].</p>
        <p>The TREC-COVID [<xref ref-type="bibr" rid="ref18">18</xref>-<xref ref-type="bibr" rid="ref20">20</xref>] challenge ran in 5 rounds, each asking for an incremental set of information needs to be retrieved from publications of the CORD-19 collection. In a TREC-COVID round, participants were asked to rank documents of the CORD-19 corpus in decreasing order of likelihood of containing answers to a set of query topics. At the end of the round, experts provided relevance judgments for the top-ranking documents submitted by different participants using a pooling strategy [<xref ref-type="bibr" rid="ref21">21</xref>]. Although limited to the first several top submissions of the participating teams, these relevance judgments enable the evaluation of the different models and are valuable examples to train retrieval models for the subsequent rounds of the challenge.</p>
        <p>To improve search and discovery of COVID-19 scientific literature, in this work we aimed to investigate an information retrieval model supported by deep language models to enhance findability of relevant documents in fast-evolving corpora.</p>
        <p>More than 50 teams participated in the TREC-COVID challenge worldwide, developing new information retrieval and NLP methodologies to tackle this complex task [<xref ref-type="bibr" rid="ref22">22</xref>-<xref ref-type="bibr" rid="ref27">27</xref>]. Having participated in the TREC-COVID challenge, in this paper we detail our retrieval methodology, which brought us competitive results with the top-ranking teams. Particularly, we used a multistage retrieval pipeline, combining classic probabilistic weighting models with novel learning to rank approaches made by ensemble of deep masked language models. We present our results and analyze how the different components of the pipeline contribute to providing the best answers to the query topics.</p>
      </sec>
      <sec>
        <title>Related Work</title>
        <sec>
          <title>Two-Stage Information Retrieval</title>
          <p>Currently, 2 main methodologies are used to rank documents in information retrieval systems: (1) the classic query-document probabilistic approaches, such as Okapi Best Match 25 (BM25) [<xref ref-type="bibr" rid="ref28">28</xref>] and probabilistic language models [<xref ref-type="bibr" rid="ref29">29</xref>], and (2) the learning-to-rank approaches, which usually postprocess results provided by classic systems to improve the original ranked list [<xref ref-type="bibr" rid="ref30">30</xref>,<xref ref-type="bibr" rid="ref31">31</xref>]. When there are sufficient training data (ie, queries with relevance judgments for the case of information retrieval), learning-to-rank models often outperform classic one-stage retrieval systems [<xref ref-type="bibr" rid="ref30">30</xref>,<xref ref-type="bibr" rid="ref32">32</xref>]. Nevertheless, empiric results have also shown that the reranking step may degrade the performance of the original rank [<xref ref-type="bibr" rid="ref33">33</xref>]. Progress on learning-to-rank algorithms has been fostered thanks to the public release of annotated benchmark datasets, such as the LETOR [<xref ref-type="bibr" rid="ref34">34</xref>] and Microsoft Machine Reading Comprehension (MS MARCO) [<xref ref-type="bibr" rid="ref35">35</xref>].</p>
          <p>Learning-to-rank approaches can be categorized into 3 main classes of algorithms — pointwise, pairwise, and listwise — based on whether they consider 1 document, a pair of documents, or the whole ranking list in the learning loss function, respectively [<xref ref-type="bibr" rid="ref30">30</xref>-<xref ref-type="bibr" rid="ref32">32</xref>,<xref ref-type="bibr" rid="ref36">36</xref>]. Variations of these learning-to-rank algorithms are available based on neural networks [<xref ref-type="bibr" rid="ref31">31</xref>,<xref ref-type="bibr" rid="ref36">36</xref>] and other learning algorithms, such as boosting trees [<xref ref-type="bibr" rid="ref37">37</xref>]. More recently, pointwise methods leveraging the power of neural-based masked language models have attracted great attention [<xref ref-type="bibr" rid="ref38">38</xref>,<xref ref-type="bibr" rid="ref39">39</xref>]. These learning-to-rank models use the query and document learning representations provided by the masked language model to classify whether a document in the ranked list is relevant to the query. While these two-stage retrieval methods based on neural rerankers provide interesting features, such as learned word proximity, in practice, the first stage based on classic probabilistic retrieval algorithms is indispensable, as the algorithmic complexity of the reranking methods makes them often prohibitive to classify the whole collection [<xref ref-type="bibr" rid="ref32">32</xref>].</p>
          <p>Recent advances in text analytics, including question answering, text classification, and information retrieval, have indeed mostly been driven by neural-based masked language models. A seminal effort in this direction is the Bidirectional Encoder Representations from Transformers (BERT) model [<xref ref-type="bibr" rid="ref38">38</xref>], which shows significant success in a wide range of NLP tasks. BERT uses a bidirectional learning approach based on the transformer architecture [<xref ref-type="bibr" rid="ref40">40</xref>] and is trained to predict masked words in context. Since the introduction of BERT, several works tried to augment its performance. A successful work in this direction is the robustly optimized BERT approach (RoBERTa) [<xref ref-type="bibr" rid="ref41">41</xref>], using larger and more diverse corpora for training as well as a different tokenizer. While RoBERTa needs larger computing power, it often improves the performance of BERT across different downstream tasks. Another similar effort is the XLNet model [<xref ref-type="bibr" rid="ref42">42</xref>], which uses a permutation-based masking, showing also consistent improvement over BERT.</p>
        </sec>
        <sec>
          <title>TREC-COVID Retrieval Efforts</title>
          <p>Recently, the specific case of retrieval of COVID-related scientific publications has been addressed in several efforts [<xref ref-type="bibr" rid="ref22">22</xref>-<xref ref-type="bibr" rid="ref27">27</xref>]. These works follow mostly the aforementioned two-stage retrieval process. Among the first efforts is the SLEDGE system [<xref ref-type="bibr" rid="ref22">22</xref>], where the authors detailed their solutions for the first round of the TREC-COVID challenge using a BM25-based ranking method followed by a neural reranker. An important difficulty for the first round of the challenge is the absence of labelled data. To overcome this limitation, the authors lightly tuned the hyperparameters of the first-stage ranking model using minimal human judgments on a subset of the topics. As for the second stage, they used the SciBERT model [<xref ref-type="bibr" rid="ref43">43</xref>], which is pretrained on biomedical texts, and fine-tuned on the general MS MARCO set [<xref ref-type="bibr" rid="ref35">35</xref>] with a simple cross-entropy loss. CO-Search [<xref ref-type="bibr" rid="ref24">24</xref>] uses a slightly different approach, wherein they incorporated semantic information, as captured by Sentence-BERT [<xref ref-type="bibr" rid="ref44">44</xref>], also within the initial retrieval stage. Moreover, they used the citation information of publications in their ranking pipeline. In the work of Covidex [<xref ref-type="bibr" rid="ref23">23</xref>], the authors provided a full-stack search engine implementing a multistage ranking pipeline, where their first stage is based on the Anserini information retrieval toolkit [<xref ref-type="bibr" rid="ref45">45</xref>], complemented by different neural reranking strategies. They addressed the issue of length variability among documents with an atomic document representation using, for example, paragraph-level indexing.</p>
        </sec>
      </sec>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <p>In this section, we describe the corpus and query set and our methodology for searching COVID-19–related literature in the context of the TREC-COVID challenge. We start by introducing the CORD-19 dataset, which is the corpus used in the competition. We then describe the challenge organization and assessment queries. Then, we detail our searching methodology, based on a multistage retrieval approach. Finally, we present the evaluation criteria used to score the participants’ submissions. For further details on the TREC-COVID challenge, see [<xref ref-type="bibr" rid="ref19">19</xref>,<xref ref-type="bibr" rid="ref20">20</xref>].</p>
      <sec>
        <title>The CORD-19 Dataset</title>
        <p>A prominent effort to gather publications, preprints, and reports related to the coronaviruses and acute respiratory syndromes (COVID-19, Middle East respiratory syndrome [MERS], and severe acute respiratory syndrome [SARS]) is the CORD-19 collection of the Allen Institute for Artificial Intelligence (in collaboration with other partners) [<xref ref-type="bibr" rid="ref13">13</xref>]. <xref rid="figure1" ref-type="fig">Figure 1</xref> describes the size and content origin of the corpus for the different TREC-COVID rounds. As we can see, this is a large and dynamically growing semistructured dataset from various sources like PubMed, PubMed Central (PMC), WHO, and preprint servers like bioRxiv, medRxiv, and arXiv. The dataset contains document metadata, including title, abstract, and authors, among others, but also the full text or link to full-text files when available. A diverse set of related disciplines (eg, from virology and immunology to genetics) are represented in the collection. Throughout the challenge, the dataset was updated daily, and snapshot versions representing its status at a certain time were provided to the participants for each round. In the last round of the TREC-COVID challenge, the corpus contained around 200,000 documents, coming mostly from Medline, PMC, and WHO sources.</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>Evolution of the CORD-19 corpus across the TREC-COVID rounds stratified by source. PMC: PubMed Central; WHO: World Health Organization.</p>
          </caption>
          <graphic xlink:href="jmir_v23i9e30161_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>The TREC-COVID Challenge</title>
        <p>To assess the different information retrieval models, the TREC-COVID challenge provided a query set capturing relevant search questions of researchers during the pandemic. These needs are stated in query topics, consisting of 3 free-text fields — query, question, and narrative — with an increasing level of context, as shown in the examples in <xref ref-type="table" rid="table1">Table 1</xref>. The challenge started with 30 topics in round 1 and added 5 new topics in each new round, thus reaching 50 topics in round 5.</p>
        <p>In each round, the participants provided ranked lists of candidate publications of the CORD-19 collection that best answered the query topics. Each list was generated by a different information retrieval model, so called <italic>run</italic>, with up to 5 runs in the first 4 rounds and 7 runs in the last round per team. At the end of the round, domain experts examined the top k candidate publications (where k is defined by the organizers) from the priority runs of the teams and judged them as “highly relevant,” “somehow relevant,” or “irrelevant.” Then, based on the consolidated relevance judgments, the participants were evaluated using standard information retrieval metrics (eg, normalized discounted cumulative gain [NDCG], precision). Judged documents for a specific topic from previous rounds were excluded from the relevance judgment list.</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Examples of TREC-COVID topics with the fields query, question, and narrative.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="80"/>
            <col width="150"/>
            <col width="300"/>
            <col width="470"/>
            <thead>
              <tr valign="top">
                <td>Topic</td>
                <td>Query</td>
                <td>Question</td>
                <td>Narrative</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>1</td>
                <td>Coronavirus origin</td>
                <td>What is the origin of COVID-19?</td>
                <td>Seeking a range of information about the SARS-CoV-2 virus’s origin, including its evolution, animal source, and first transmission into humans</td>
              </tr>
              <tr valign="top">
                <td>25</td>
                <td>Coronavirus biomarkers</td>
                <td>Which biomarkers predict the severe clinical course of 2019-nCOV infection?</td>
                <td>Looking for information on biomarkers that predict disease outcomes in people infected with coronavirus, specifically those that predict severe and fatal outcomes</td>
              </tr>
              <tr valign="top">
                <td>50</td>
                <td>mRNA vaccine coronavirus</td>
                <td>What is known about an mRNA vaccine for the SARS-CoV-2 virus?</td>
                <td>Looking for studies specifically focusing on mRNA vaccines for COVID-19, including how mRNA vaccines work, why they are promising, and any results from actual clinical studies</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
      </sec>
      <sec>
        <title>Proposed Multistage Retrieval Methodology</title>
        <p><xref rid="figure2" ref-type="fig">Figure 2</xref> shows the different components of our information retrieval pipeline for the COVID-related literature. These components can be divided into 3 main categories: (1) first-stage retrieval using classic probabilistic methods, (2) second-stage (neural) reranking models, and (3) rank fusion algorithms. Given a corpus containing metadata information, such as title and abstract, and full text, when available, documents are stored using directed and inverted indexes. Then, transformer-based and classic learning-to-rank models trained using relevance judgments are used to classify and rerank pairs of query-document answers. The ranked list obtained from the different models are further combined using the reciprocal rank fusion (RRF) algorithm.</p>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>Multistage retrieval pipeline, where light green indicates first-stage retrieval, light and dark blue indicate second-stage retrieval, and M1-M7 denote the different models used to create the respective runs 1-7 in round 5. BERT: Bidirectional Encoder Representations from Transformers; BM25: Okapi Best Match 25; DFR: divergence from randomness; L-MART: LambdaMART model; LMD: language model Dirichlet; Logistic: logistic regression model; RoBERTa: robustly optimized BERT approach; RRF: reciprocal rank fusion.</p>
          </caption>
          <graphic xlink:href="jmir_v23i9e30161_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <sec>
          <title>First-Stage Retrieval</title>
          <p>For the first-stage retrieval, we assessed 3 variations of the classic query-document probabilistic weighting models: BM25 [<xref ref-type="bibr" rid="ref46">46</xref>], divergence from randomness (DFR) [<xref ref-type="bibr" rid="ref47">47</xref>], and language model Dirichlet (LMD) [<xref ref-type="bibr" rid="ref48">48</xref>].</p>
          <p>Our first classical model, Okapi BM25 [<xref ref-type="bibr" rid="ref28">28</xref>], is based on the popular term frequency-inverse document frequency (tf-idf) framework. In the tf-idf framework, term-weights are calculated using the product of within term-frequency <italic>tf</italic> and the inverse document frequency <italic>idf</italic> statistics. Denote <italic>f(t,d)</italic> as the number of times a term <italic>t</italic> appears in a document <italic>d</italic> within a collection <italic>D</italic>, BM25 calculates the term-weight <italic>w</italic> as:</p>
          <p>
            <disp-formula>
              <graphic xlink:href="jmir_v23i9e30161_fig8.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
            </disp-formula>
          </p>
          <p>where &#124;<italic>d</italic>&#124; is the length of the document, &#124;<italic>D</italic>&#124; is the size of the collection, <italic>avg<sub>l</sub></italic> is the average length of the documents in the collection, <italic>n<sub>t</sub></italic> is the number of documents containing the term <italic>t</italic>, and <italic>k</italic><sub>1</sub> and <italic>b</italic> are parameters of the model associated with the term frequency and the document size normalization, respectively.</p>
          <p>The second model, DFR, extends the basic tf-idf concept by considering that the more the divergence of the term frequency <italic>tf</italic> from its collection frequency <italic>cf</italic> (<italic>cf ≈ df</italic>), the more the information carried by the term in the document [<xref ref-type="bibr" rid="ref47">47</xref>]. Thus, for a given model of randomness <italic>M</italic>, in the DFR framework, the term-weight is inversely proportional to the probability of term-frequency within the document obtained by <italic>M</italic> for the collection <italic>D</italic>:</p>
          <p>
            <disp-formula>w(t,d,D) = k·logp<sub>M</sub> (t ϵ d&#124;D),</disp-formula>
          </p>
          <p>where <italic>p<sub>M</sub></italic> is a probabilistic model, such as binomial or geometric distributions, and <italic>k</italic> is a parameter of the probabilistic model.</p>
          <p>The third model, LMD, uses a language model that assigns probabilities to word sequences with a Dirichlet-prior smoothing to measure the similarity between a query and a document [<xref ref-type="bibr" rid="ref48">48</xref>]. In a retrieval context, a language model specifies the probability that a document is generated by a query, and smoothing is used to avoid zero probabilities to unseen words and improves the overall word probability accuracy. In the LMD algorithm, term-weight is calculated using the following equation:</p>
          <p>
            <disp-formula>
              <graphic xlink:href="jmir_v23i9e30161_fig9.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
            </disp-formula>
          </p>
          <p>where <italic>p</italic>(<italic>t</italic>&#124;<italic>d</italic>) denotes the probability of a term in a document, <italic>p</italic>(<italic>t</italic>&#124;<italic>D</italic>) is the probability of a term in the collection, and <italic>μ</italic> is the Dirichlet parameter to control the amount of smoothing.</p>
          <p>In our pipeline, the BM25, DFR, and LMD implementations are based on the Elasticsearch framework. The model parameters were trained using the queries and relevance judgments of round 4 in a 5-fold cross-validation setup.</p>
        </sec>
        <sec>
          <title>Second-Stage Reranking</title>
          <p>The models used in the first-stage ranking were based on the bag-of-words statistics, where essentially we looked at the histogram of query terms and their document and collection statistics but neglected the sequential nature of text and word relations. To mitigate these limitations and improve the initial rankings, after the first-stage retrieval, we used neural masked language models trained on the relevance judgments from previous rounds so that syntactic and semantic relations can be better captured [<xref ref-type="bibr" rid="ref49">49</xref>,<xref ref-type="bibr" rid="ref50">50</xref>]. As shown in <xref rid="figure2" ref-type="fig">Figure 2</xref>, we assessed 3 masked language models based on the transformer architecture: BERT, RoBERTa, and XLNet.</p>
          <p><xref rid="figure3" ref-type="fig">Figure 3</xref> shows the general idea of how we used the BERT language model to match documents to a query topic. Given a topic and a document associated with it as input and a relevance judgment as the label for the query-document association (relevant or not), the model was trained or fine-tuned in the BERTology parlance, as it had been previously pretrained on a large corpus, to predict whether the document is relevant to the query. In the input layer of the pretrained model, the topic and candidate publication were tokenized and separated by the language model <italic>[SEP]</italic> token (stands for sentence separation). Moreover, to enforce the sequential structure of text, positional embedding as well as sentence embeddings were added to the main embeddings for each token. These embeddings were then fed to the transformer layers of BERT, which were updated during the fine-tuning step. Finally, the output of the special <italic>[CLS]</italic> token (stands for classification) was used to determine the relevance of the candidate publication to the queried information topic.</p>
          <fig id="figure3" position="float">
            <label>Figure 3</label>
            <caption>
              <p>Neural masked language model for document relevance classification. As inputs to the pre-trained masked language model, the topics and candidate publications are separated by the [SEP] tag. Inputs are tokenized using subword tokenization methods (grey boxes); segment embeddings (yellow boxes) represent the difference between a topic and a document input; position embeddings (green boxes) enforce the sequential structure of text; the transformer and classification layers are updated in the training phase using the relevance judgments; and the output of the special [CLS] token is finally used to determine the relevance of the candidate publication to the queried information topic.</p>
            </caption>
            <graphic xlink:href="jmir_v23i9e30161_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
          <p>Using the query topics from a preceding round (round 4 for the results presented here) and their respective list of relevance judgments, we fine-tuned the BERT model to rescore the initial association of the query-document pair between (not relevant) and (very relevant). For this, we used the score associated with the <italic>[CLS]</italic> token position. We limited the input size of the query and document to 512 tokens (or subwords). Then, at the second-stage re-ranking step, we classified the top <italic>k</italic> publications retrieved by the first stage models using the fine-tuned BERT model (we set <italic>k</italic>=5000 in our experiments).</p>
          <p>Identical training strategies were used for the RoBERTa and XLNet language models. The main difference for the RoBERTa model is that it was originally pretrained on a corpus with an order of magnitude bigger than that of BERT (160 GB vs 16 GB). Moreover, it uses dynamic masking during the training process, that is, at each training epoch, the model sees different versions of the same sentence with masks on different positions, compared to a static mask algorithm for BERT. Last, RoBERTa uses a byte-level Byte-Pair-Encoding tokenizer compared to BERT’s WordPiece. As BERT and its variants (eg, RoBERTa) neglect the dependency between the masked positions and suffer from a pretrain-finetune discrepancy, XLNet adopts a permutation language model instead of masked language model to solve the discrepancy problem. For downstream tasks, the fine-tuning procedure of XLNet is similar to that of BERT and RoBERTa.</p>
          <p>We used the BERT (base - 12 layers), RoBERTa, and XLNet model implementations available from the Hugging Face framework. The models were trained using the Adam optimizer [<xref ref-type="bibr" rid="ref51">51</xref>] with an initial learning rate of 1.5e–5, weight decay of 0.01, and early stopping with a patience of 5 epochs.</p>
        </sec>
        <sec>
          <title>Combining Model Results</title>
          <p>We used the RRF algorithm [<xref ref-type="bibr" rid="ref52">52</xref>] to combine the results of different retrieval runs. RRF is a simple, yet powerful technique to rescore a retrieval list based on the scores of multiple retrieval lists. Given a set of documents <italic>D</italic> to be sorted and a set of ranking files <italic>R</italic> = {<italic>r</italic><sub>1</sub>…<italic>r</italic><sub>n</sub>}, each with a permutation on 1…&#124;<italic>D</italic>&#124;, RRF computes the aggregated score using the following equation:</p>
          <p>
            <disp-formula>
              <graphic xlink:href="jmir_v23i9e30161_fig10.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
            </disp-formula>
          </p>
          <p>where <italic>r</italic>(<italic>q,d</italic>) is the rank of document <italic>d</italic> for the query <italic>q</italic> in the ranking file <italic>r<sub>i</sub></italic> and <italic>k</italic> is a threshold parameter, which was tuned to <italic>k</italic>=60 using data from previous rounds.</p>
        </sec>
        <sec>
          <title>Second-Step Learning-to-Rank</title>
          <p>To exploit the features (relevance score) created by the different bag-of-words and masked language models, we added a second-step learning-to-rank pass to our pipeline. Using the similarity scores <italic>s</italic> computed by the BM25, DFR, LMD, BERT, RoBERTa, and XLNet as input features and the relevance judgments of previous rounds as labels, we trained 2 learning-to-rank models: LambdaMART and a logistic regression classifier. While the language models exploit the sequential nature of text, they completely neglect the ranking provided by the bag-of-words models. Thus, we investigated the use of the LambdaMART [<xref ref-type="bibr" rid="ref31">31</xref>] algorithm, which uses a pairwise loss that compares pairs of documents and tells which document is better in the given pair. Moreover, we trained a simple pointwise logistic regression to consider the similarity measures computed by the first- and second-stage retrieval models. We used the pyltr and scikit-learn implementations for the LambdaMART and logistic regression, respectively. For the LambdaMART model, we trained the learning rate and the number of estimators, and for the logistic regression model, we trained the solver and regularization strength parameters.</p>
        </sec>
        <sec>
          <title>First-Stage Retrieval: Preprocessing, Querying Strategies, and Parameter Tuning</title>
          <p>In the first-stage retrieval step, we applied a classical NLP preprocessing pipeline to the publications (indexing phase) and topics (search phase): lower-casing, removal of nonalphanumerical characters (apart from “-”), and Porter stemming. Additionally, a minimal set of COVID-related synonyms, such as “covid-19” and “sars-cov-2,” were created and used for query expansion.</p>
          <p>The queries were then submitted to the index in a combinatorial way using the different topic fields and document sections. This means that, for each <italic>query</italic>, <italic>question</italic>, and <italic>narrative</italic> field of a topic, we submitted a query against the index for each of the <italic>title</italic> and <italic>abstract</italic> sections of the publications (abstract + full text in case of the full-text index). Additionally, the whole topic (query + question + narrative) was queried against the whole document. This querying strategy led to 7 queries for each topic, and the final score was computed by summing up the individual scores. Moreover, as the first public announcement of a coronavirus-related pneumonia was made in January 2020, we filtered out all publications before December 2019.</p>
          <p>We defined the best query strategy and fine-tuned the basic parameters of the bag-of-words models using the relevance judgments of the previous round in a 5-fold cross-validation approach. As an example, to tune the <italic>b</italic> and <italic>k</italic> parameters of the BM25 model in round 5, we took the topics and relevance judgment from round 4 and submitted them to the index in round 5, optimizing the P@10 metric. For round 1, we used default parameter values.</p>
        </sec>
      </sec>
      <sec>
        <title>Evaluation Criteria</title>
        <p>We used the official metrics of the TREC-COVID challenge to report our results: precision at K documents (P@K), NDCG at K documents (NDCG@K), mean average precision (MAP), and binary preference (Bpref) [<xref ref-type="bibr" rid="ref19">19</xref>]. For all these metrics, the closest the result is to 1, the better the retrieval model. They were obtained using the <italic>trec_eval</italic> information retrieval evaluation toolkit.</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <p>We used 7 models from our pipeline to create the 7 runs submitted for the official evaluation of the TREC-COVID challenge (labels M1 to M7 in <xref rid="figure2" ref-type="fig">Figure 2</xref>). Our first model — <italic>bm25</italic> — based on the BM25 weighting model against the metadata index provided the baseline run. Our second model — <italic>bow + rrf</italic> — was a fusion of the BM25, DFR, and LMD weighting models computed against the metadata and full-text indices and combined using the RRF algorithm. Model 3 — <italic>mlm + rrf</italic> — used the RRF combination of the BERT, RoBERTa, and XLNet models applied to the top 5000 documents retrieved by model 2. Model 4 — <italic>bow + mlm + rrf</italic> — combined the results of models 2 and 3 using the RRF algorithm. Then, model 5 — <italic>bow + mlm + lm</italic> — reranked the results of runs 2 and 3 using the LambdaMART algorithm trained using the similarity scores of the individual models 2 and 3. Similarly, model 6 — <italic>bow + mlm + lr</italic> — was based on a logistic regression classifier that uses as features the similarity scores of runs 2 and 3 to classify the relevance of the query-document pairs. Finally, model 7 — <italic>bow + mlm + lr + rrf</italic> — combined runs 2, 3, and 6 using the RRF algorithm. For all RRF combinations, the parameter <italic>k</italic> was set to 60. All models and parameters were trained using round 4 relevance judgments. <xref ref-type="table" rid="table2">Table 2</xref> summarizes the submitted runs.</p>
      <table-wrap position="float" id="table2">
        <label>Table 2</label>
        <caption>
          <p>Summary of the submitted runs. Refer to <xref rid="figure2" ref-type="fig">Figure 2</xref> for a pictorial description.</p>
        </caption>
        <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
          <col width="100"/>
          <col width="150"/>
          <col width="750"/>
          <thead>
            <tr valign="top">
              <td>Run</td>
              <td>Name</td>
              <td>Description</td>
            </tr>
          </thead>
          <tbody>
            <tr valign="top">
              <td>1</td>
              <td>bm25</td>
              <td>Run based on the baseline BM25<sup>a</sup> model using the metadata index</td>
            </tr>
            <tr valign="top">
              <td>2</td>
              <td>bow + rrf</td>
              <td>An RRF<sup>b</sup> combination of BM25, DFR<sup>c</sup>, and LMD<sup>d</sup> models computed against the metadata and full-text indices</td>
            </tr>
            <tr valign="top">
              <td>3</td>
              <td>mlm + rrf</td>
              <td>An RRF combination of BERT, RoBERTa<sup>e</sup>, and XLNet models applied to run 2</td>
            </tr>
            <tr valign="top">
              <td>4</td>
              <td>bow + mlm + rrf</td>
              <td>An RRF combination of runs 2 and 3</td>
            </tr>
            <tr valign="top">
              <td>5</td>
              <td>bow + mlm + lm</td>
              <td>A LambdaMART-based model using features from the individual models used to create runs 2 and 3</td>
            </tr>
            <tr valign="top">
              <td>6</td>
              <td>bow + mlm + lr</td>
              <td>A logistic regression model using features from the individual models used to create runs 2 and 3</td>
            </tr>
            <tr valign="top">
              <td>7</td>
              <td>bow + mlm + lr + rrf</td>
              <td> An RRF combination of runs 2, 3, and 6</td>
            </tr>
          </tbody>
        </table>
        <table-wrap-foot>
          <fn id="table2fn1">
            <p><sup>a</sup>BM25: Okapi Best Match 25.</p>
          </fn>
          <fn id="table2fn2">
            <p><sup>b</sup>RRF: reciprocal rank fusion.</p>
          </fn>
          <fn id="table2fn3">
            <p><sup>c</sup>DFR: divergence from randomness.</p>
          </fn>
          <fn id="table2fn4">
            <p><sup>d</sup>LMD: language model Dirichlet.</p>
          </fn>
          <fn id="table2fn5">
            <p><sup>e</sup>RoBERTa: robustly optimized BERT approach.</p>
          </fn>
        </table-wrap-foot>
      </table-wrap>
      <sec>
        <title>Official Evaluation Results</title>
        <p><xref ref-type="table" rid="table3">Table 3</xref> shows the official results of the TREC-COVID challenge for the 7 submitted runs. As we can see, the best results are provided by model 7 (bow + mlm + lr + rrf), apart from the metric Bpref, which is the highest for model 5 (bow + mlm + lm). Comparing the NDCG@20 metric, model 7 improved 16.4 percentage points against the baseline model (26.0% relative improvement). On average, almost 17 of the top 20 documents retrieved by model 7 were pertinent to the query. Model 3 was able to retrieve 6.6% more relevant documents compared to the baseline model (6963 vs 6533 of a total of 10,910 documents judged relevant for the 50 queries). On the other hand, it showed a relative improvement in precision of 22.1% for the top 20 documents. Therefore, it not only improved the recall but also brought relevant documents higher in the ranking list. These results show that the use of the masked language models had a significant positive impact in the ranking.</p>
        <table-wrap position="float" id="table3">
          <label>Table 3</label>
          <caption>
            <p>Performance of our models in round 5 of the TREC-COVID challenge.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="400"/>
            <col width="120"/>
            <col width="120"/>
            <col width="120"/>
            <col width="120"/>
            <col width="120"/>
            <thead>
              <tr valign="bottom">
                <td>Model</td>
                <td>NDCG@20<sup>a</sup></td>
                <td>P@20<sup>b</sup></td>
                <td>Bpref<sup>c</sup></td>
                <td>MAP<sup>d</sup></td>
                <td># rel<sup>e</sup></td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>bm25</td>
                <td>0.6320</td>
                <td>0.6440</td>
                <td>0.5021</td>
                <td>0.2707</td>
                <td>6533</td>
              </tr>
              <tr valign="top">
                <td>bow + rrf</td>
                <td>0.6475</td>
                <td>0.6650</td>
                <td>0.5174</td>
                <td>0.2778</td>
                <td>6695</td>
              </tr>
              <tr valign="top">
                <td>mlm + rrf</td>
                <td>0.7716</td>
                <td>0.7880</td>
                <td>0.5680</td>
                <td>0.3468</td>
                <td>6963</td>
              </tr>
              <tr valign="top">
                <td>bow + mlm + rrf</td>
                <td>0.7826</td>
                <td>0.8050</td>
                <td>0.5616</td>
                <td>0.3719</td>
                <td>7006</td>
              </tr>
              <tr valign="top">
                <td>bow + mlm + lm</td>
                <td>0.7297</td>
                <td>0.7460</td>
                <td>0.5759</td>
                <td>0.3068</td>
                <td>6834</td>
              </tr>
              <tr valign="top">
                <td>bow + mlm + lr</td>
                <td>0.7375</td>
                <td>0.7450</td>
                <td>0.5719</td>
                <td>0.3439</td>
                <td>6976</td>
              </tr>
              <tr valign="top">
                <td>bow + mlm + lr + rrf</td>
                <td>0.7961</td>
                <td>0.8260</td>
                <td>0.5659</td>
                <td>0.3789</td>
                <td>6939</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table3fn1">
              <p><sup>a</sup>NDCG@20: normalized discounted cumulative gain at 20 documents.</p>
            </fn>
            <fn id="table3fn2">
              <p><sup>b</sup>P@20: precision at 20 documents.</p>
            </fn>
            <fn id="table3fn3">
              <p><sup>c</sup>Bpref: binary preference.</p>
            </fn>
            <fn id="table3fn4">
              <p><sup>d</sup>MAP: mean average precision.</p>
            </fn>
            <fn id="table3fn5">
              <p><sup>e</sup># ref: total number of relevant documents retrieved by the model for the 50 queries.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <p><xref ref-type="table" rid="table4">Table 4</xref> shows the official best results for the different metrics for the top 10 teams participating in round 5 of TREC-COVID (NDCG@20 metric taken as reference). Comparing the NDCG@20 metric, the best model submitted by our team (risklick) was ranked 4 of the 28 teams participating in round 5, 5.4 percentage points below the top-performing team (Unique-ptr). For reference, the best-performing model in the challenge retrieves on average 17.5 relevant documents per query in the top 20 retrieved documents compared to 16.5 for our model. If we consider a reference baseline made by the median of the participating teams’ best values, our pipeline outperforms the baseline by 11.7%, 14.6%, 16.7%, and 25.0% for the MAP, P@20, NDCG@20, and Bpref metrics, respectively. All data and results of the TREC-COVID challenge can be found here: [<xref ref-type="bibr" rid="ref17">17</xref>].</p>
        <table-wrap position="float" id="table4">
          <label>Table 4</label>
          <caption>
            <p>Official leaderboard of the top 10 teams in the final round of the TREC-COVID challenge.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="200"/>
            <col width="200"/>
            <col width="200"/>
            <col width="200"/>
            <col width="200"/>
            <thead>
              <tr valign="bottom">
                <td>Team</td>
                <td>NDCG@20<sup>a</sup></td>
                <td>P@20<sup>b</sup></td>
                <td>Bpref<sup>c</sup></td>
                <td>MAP<sup>d</sup></td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>unique_ptr</td>
                <td>0.8496</td>
                <td>0.8760</td>
                <td>0.6378</td>
                <td>0.4731</td>
              </tr>
              <tr valign="top">
                <td>covidex</td>
                <td>0.8311</td>
                <td>0.8460</td>
                <td>0.5330</td>
                <td>0.3922</td>
              </tr>
              <tr valign="top">
                <td>Elhuyar_NLP_team</td>
                <td>0.8100</td>
                <td>0.8340</td>
                <td>0.6284</td>
                <td>0.4169</td>
              </tr>
              <tr valign="top">
                <td>risklick (ours)</td>
                <td>0.7961</td>
                <td>0.8260</td>
                <td>0.5759</td>
                <td>0.3789</td>
              </tr>
              <tr valign="top">
                <td>udel_fang</td>
                <td>0.7930</td>
                <td>0.8270</td>
                <td>0.5555</td>
                <td>0.3682</td>
              </tr>
              <tr valign="top">
                <td>CIR</td>
                <td>0.7921</td>
                <td>0.8320</td>
                <td>0.5735</td>
                <td>0.3983</td>
              </tr>
              <tr valign="top">
                <td>uogTr</td>
                <td>0.7921</td>
                <td>0.8420</td>
                <td>0.5709</td>
                <td>0.3901</td>
              </tr>
              <tr valign="top">
                <td>UCD_CS</td>
                <td>0.7859</td>
                <td>0.8440</td>
                <td>0.4488</td>
                <td>0.3348</td>
              </tr>
              <tr valign="top">
                <td>sabir</td>
                <td>0.7789</td>
                <td>0.8210</td>
                <td>0.6078</td>
                <td>0.4061</td>
              </tr>
              <tr valign="top">
                <td>mpiid5</td>
                <td>0.7759</td>
                <td>0.8110</td>
                <td>0.5873</td>
                <td>0.3903</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table4fn1">
              <p><sup>a</sup>NDCG@20: normalized discounted cumulative gain at 20 documents.</p>
            </fn>
            <fn id="table4fn2">
              <p><sup>b</sup>P@20: precision at 20 documents.</p>
            </fn>
            <fn id="table4fn3">
              <p><sup>c</sup>Bpref: binary preference.</p>
            </fn>
            <fn id="table4fn4">
              <p><sup>d</sup>MAP: mean average precision.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Model Performance Analyses</title>
        <p><xref rid="figure4" ref-type="fig">Figure 4</xref> shows the relative improvement of the different models in the pipeline in relation to the baseline (model 1 – bm25) according to the NDCG@20 metric. The most significant contribution to the final performance came from the inclusion of the masked language models in the pipeline — model 3: mlm + rrf and model 4: bow + mlm + rrf — adding a relative performance gain to the results of 22.1% and 23.8%, respectively. The classic learning-to-rank models — model 5 and model 6 — actually jeopardized the performance when compared to their previous model in the pipeline (model 4). However, when model 6 was combined with model 4, a 2.1 percentage point gain was achieved on top of model 4, leading to the best model (model 7: bow + mlm + lr + rrf). Indeed, it is important to notice the consistent benefit of combining models using the RRF algorithm. Interestingly, the effect of LambdaMART seemed to be significantly detrimental for P@20, NDCG@20, and MAP, but marginally beneficial for Bpref, for which it is the best model.</p>
        <fig id="figure4" position="float">
          <label>Figure 4</label>
          <caption>
            <p>Relative contribution of each model for the normalized discounted cumulative gain at document 20 (NDCG@20) metric compared to the baseline model bm25.</p>
          </caption>
          <graphic xlink:href="jmir_v23i9e30161_fig4.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <p>The performance of the individual masked language models is shown in <xref ref-type="table" rid="table5">Table 5</xref>. Surprisingly, they are similar to the baseline model, with small performance reductions for BERT and RoBERTa models and a small performance gain for the XLNet model. However, when combined, they provide the significant performance improvement shown in <xref rid="figure4" ref-type="fig">Figure 4</xref>. Our assumption is that they retrieve different documents as relevant and their combination using RRF ends up aligning these documents in the top rank. Indeed, looking at the top 3 documents for query 1 retrieved by these models, for example, there is no overlap between the documents, with 8 relevant and 1 unjudged (out of the 9 documents). This result clearly shows the beneficial effect of using an ensemble of masked language models, as well as the success of RRF in fusing their retrievals.</p>
        <table-wrap position="float" id="table5">
          <label>Table 5</label>
          <caption>
            <p>Performance of the individual masked language models and their combination using reciprocal rank fusion (RRF).</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="200"/>
            <col width="170"/>
            <col width="170"/>
            <col width="170"/>
            <col width="170"/>
            <col width="120"/>
            <thead>
              <tr valign="bottom">
                <td>Model</td>
                <td>NDCG@20<sup>a</sup></td>
                <td>P@20<sup>b</sup></td>
                <td>Bpref<sup>c</sup></td>
                <td>MAP<sup>d</sup></td>
                <td># rel<sup>e</sup></td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>BERT<sup>f</sup></td>
                <td>0.6209</td>
                <td>0.6430</td>
                <td>0.5588</td>
                <td>0.2897</td>
                <td>6879</td>
              </tr>
              <tr valign="top">
                <td>RoBERTa<sup>g</sup></td>
                <td>0.6261</td>
                <td>0.6440</td>
                <td>0.5530</td>
                <td>0.2946</td>
                <td>6945</td>
              </tr>
              <tr valign="top">
                <td>XLNet</td>
                <td>0.6436</td>
                <td>0.6570</td>
                <td>0.5644</td>
                <td>0.3064</td>
                <td>6926</td>
              </tr>
              <tr valign="top">
                <td>mlm + rrf</td>
                <td>0.7716</td>
                <td>0.7880</td>
                <td>0.5680</td>
                <td>0.3468</td>
                <td>6963</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table5fn1">
              <p><sup>a</sup>NDCG@20: normalized discounted cumulative gain at 20 documents.</p>
            </fn>
            <fn id="table5fn2">
              <p><sup>b</sup>P@20: precision at 20 documents.</p>
            </fn>
            <fn id="table5fn3">
              <p><sup>c</sup>Bpref: binary preference.</p>
            </fn>
            <fn id="table5fn4">
              <p><sup>d</sup>MAP: mean average precision.</p>
            </fn>
            <fn id="table5fn5">
              <p><sup>e</sup># ref: total number of relevant documents retrieved by the model for the 50 queries.</p>
            </fn>
            <fn id="table5fn6">
              <p><sup>f</sup>BERT: Bidirectional Encoder Representations from Transformers.</p>
            </fn>
            <fn id="table5fn7">
              <p><sup>g</sup>RoBERTa: robustly optimized BERT approach.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Topic Performance Analyses</title>
        <p>The performance analyses for the individual topics shows that our best model had a median value of 0.9000 for the P@20 metric (max=1.0000, min=0.3000), which demonstrates successful overall performance. However, as shown in <xref rid="figure5" ref-type="fig">Figure 5</xref>, for some topics, notably 11, 12, 19, 33, and 50, less than 50% of documents in the top 20 retrieved are relevant. For topics 11, 12, and 19, which searched for “coronavirus hospital rationing,” “coronavirus quarantine,” and “what alcohol sanitizer kills coronavirus” information, respectively, all our models have poor performance, and indeed, the combination of the different models in the pipeline managed to boost the results. On the other hand, for topics 33 and 50, which searched for “coronavirus vaccine candidates” and “mRNA vaccine coronavirus” information, respectively, it was the combination with the logistic regression model that lowered the performance (notice in <xref rid="figure5" ref-type="fig">Figure 5</xref> that model 4: bow + mlm + rrf has a significantly better performance compared to model 7 for those queries).</p>
        <p>The difference in performance per topic between our best model and the median of the submitted runs in round 5 for all teams for the P@20 metric is shown in <xref rid="figure6" ref-type="fig">Figure 6</xref>. Indeed, topics 11, 12, and 19 seemed hard for all the models participating in the TREC-COVID challenge to retrieve the correct documents. Even if our best model had poor performance for those topics, it still outperformed most of the runs submitted to the official evaluation. In particular, topic 19 had only 9 relevant or somewhat relevant documents in the official relevance judgments, which means that its max performance can be at most around 50% for the P@20 metric. For our worst performing topics compared to the other participants — topics 33 and 50 — better tuning between the ranking weights of the bag-of-words, masked language, and logistic regression models could have boosted the results.</p>
        <fig id="figure5" position="float">
          <label>Figure 5</label>
          <caption>
            <p>Per topic precision at rank 20 (P@20) in round 5 of TREC-COVID per each run. The baseline run1 and the best-performing run7, which benefits from neural language models, are highlighted with dashed lines. Note that for most topics, the transformer-based runs have significantly improved performance.</p>
          </caption>
          <graphic xlink:href="jmir_v23i9e30161_fig5.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <fig id="figure6" position="float">
          <label>Figure 6</label>
          <caption>
            <p>Per topic performance difference between our best model (model 7) and the median of all official submissions for the precision at document 20 (P@20) metric in round 5.</p>
          </caption>
          <graphic xlink:href="jmir_v23i9e30161_fig6.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Time-Dependent Relevance Analyses</title>
        <p>Given the dynamics of the COVID-19 pandemic, with a relatively well-defined starting period, a particularly effective technique to remove noise from the results, also adopted by some other participating teams [<xref ref-type="bibr" rid="ref22">22</xref>], is filtering documents based on their publication dates. For our first-stage retrieval models, we filtered out publications before December 2019 when the outbreak was first detected. This led to a small negative impact on recall but highly improved the precision of our models.</p>
        <p>To better understand how the document relevance varied over time, we analyzed the publication date of the official relevance judgments for the 5 rounds of TREC-COVID. As we can see in <xref rid="figure7" ref-type="fig">Figure 7</xref>, there is a clear exponential decay pattern in the number of relevant articles over time for all the rounds, with a faster decay in the first rounds and a longer tail for the later ones. We noticed that more recent publications closer to the round start, when the snapshot of the collection was created and queries were submitted, tended to have a higher probability of being relevant to the information need, with a half-life of around 20 days for round 1. This is somehow expected. First, as the documents found in previous query rounds were explored and are no longer relevant, only the most recent data are interesting, particularly in the gap between rounds. A second explanation is that in the case of a pandemic, new evidence arrives at an explosive rate, possibly refuting older knowledge.</p>
        <fig id="figure7" position="float">
          <label>Figure 7</label>
          <caption>
            <p>Distribution of the publication dates of the “highly relevant” articles for each of the TREC-COVID rounds.</p>
          </caption>
          <graphic xlink:href="jmir_v23i9e30161_fig7.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <p>To support effective search and discovery of COVID-19–related relevant literature in the COVID-19 infodemic, we explored the use of a multistage retrieval pipeline supported by bag-of-words models, masked language models, and classic learning-to-rank methods. The proposed methodology was evaluated in the context of the TREC-COVID challenge and achieved competitive results, being ranked in the top 4 of 126 runs among 28 teams participating in the challenge. The use of the multistage retrieval approach significantly improved the search results of COVID-related literature, leading to a gain in performance of 25.9% in terms of the NDCG@20 metric compared to a bag-of-words baseline. Particularly, the ensemble of masked language models brought the highest performance gain to the search pipeline. Indeed, ensembles of language models have proved to be a robust methodology to improve predictive performance [<xref ref-type="bibr" rid="ref53">53</xref>-<xref ref-type="bibr" rid="ref55">55</xref>].</p>
      <p>The COVID-19 pandemic has led to a huge amount of literature being published in the most diverse sources, including scientific journals, grey repositories, and white reports, among others. As the pandemic continues, the number of scientific publications grows at an unprecedented rate causing an infodemic within many of the different disciplines involved [<xref ref-type="bibr" rid="ref3">3</xref>]. Finding the most relevant information sources to answer different information needs within the huge volume of data created had become of utmost necessity [<xref ref-type="bibr" rid="ref2">2</xref>]. By enabling the discovery of relevant information sources to complex user queries, effective retrieval models as proposed in this work may help to tackle the spread of misinformation. Such models empower experts with a minimal cost to search and discover information sources within a massive and fast-evolving corpus. Indeed, our model provides relevant information sources for more than 8 documents in the top-10 rank. Thus, continuous active search methods could be put in place to monitor and discover sources of evidence to certain query topics of relevant public health interest (eg, “coronavirus origin”) in a timely manner. This in turn, would enable experts to analyze, identify, and curate both sources of the best evidence at the time and sources of misinformation. The former would foster the creation among others of living systematic reviews [<xref ref-type="bibr" rid="ref56">56</xref>,<xref ref-type="bibr" rid="ref57">57</xref>], which is one of the recommendations of the WHO to tackle the COVID infodemic [<xref ref-type="bibr" rid="ref2">2</xref>]. On the other hand, the latter could help fight, for example, the spread of scientific fake news by early retraction of misinforming articles, particularly in preprint servers, and thus limiting their exposition.</p>
      <p>Looking at the boost in performance of model 3 (mlm + rrf) alone, one could be tempted to argue that masked language models could be the main component in a retrieval system. However, 2 issues may arise: algorithmic complexity and search effectiveness. The former is related to the high complexity of masked language models <italic>(O(n<sup>2</sup> · h)</italic>, where <italic>n</italic> is the sentence length and <italic>h</italic> is the number of attention heads), which makes it prohibitive to classify a whole collection, often containing millions of documents, for every given query. The latter is related to the effectiveness of the individual models themselves. As shown in <xref ref-type="table" rid="table5">Table 5</xref>, individually, the performance of the language models is not significantly different from the baseline BM25 model. Thus, we believe it is the combination of models with different properties that can provide a successful search strategy in complex corpora, such as the one that originated from the COVID-19 infodemic.</p>
      <p>In terms of practical implications, by effectively processing natural language, the methodology proposed can help biomedical researchers and clinicians to find the COVID-19 papers that they need. The efficient literature discovery process fostered by our methods may lead to faster publication cycles when required, for example reducing from weeks to days the drafting time of COVID-19 reviews [<xref ref-type="bibr" rid="ref58">58</xref>], but also to less costly creation of curated living evidence portals, which will inform clinicians and public health officers with the best available evidence [<xref ref-type="bibr" rid="ref59">59</xref>]. Indeed, as shown in [<xref ref-type="bibr" rid="ref27">27</xref>,<xref ref-type="bibr" rid="ref60">60</xref>], these methodologies outperform commercially available tools for searching and discovering COVID-19–related literature. Moreover, as they are data-driven, it is expected that they can be extrapolated to other types of corpora, such as clinical trial protocols and biomedical metadata datasets [<xref ref-type="bibr" rid="ref60">60</xref>,<xref ref-type="bibr" rid="ref61">61</xref>], enabling thus a more comprehensive identification of scientific evidence. Equally important, as the COVID-19 infodemic is not the first and unlikely the last [<xref ref-type="bibr" rid="ref62">62</xref>,<xref ref-type="bibr" rid="ref63">63</xref>], our methodology and findings could be extended to help tackling future epi-, pan-, and infodemics by supporting relevant actors to scan large and fast-changing collections to create timely reviews and curated evidence and apply localized infodemic management approaches.</p>
      <p>With the rapid surge of published information and the variety of topics and sources related to COVID-19, it became hard for professionals dealing with the pandemic to find the correct information for their needs. While the automation discussed in this work can support more effective search and discovery, some high-level topics are still challenging. Indeed, some topics assessed in the TREC-COVID challenge were shown to be particularly hard for the retrieval models. For example, for topic 11, which searched for documents providing information on “guidelines for triaging patients infected with coronavirus,” our best model prioritized documents providing information about indicators for diagnosis (eg, “early recognition of coronavirus,” “RT-PCR testing of SARS-CoV-2 for hospitalized patients clinically diagnosed”). On the other hand, it missed documents including passages such as “telephone triage of patients with respiratory complaints.” Similarly, for topic 12, which searched information about the “best practices in hospitals and at home in maintaining quarantine,” our model prioritized documents providing information about “hospital preparedness” (eg, “improving preparedness for,” “preparedness among hospitals”) and missed documents containing information about “home-based exercise note in Covid-19 quarantine situation.”</p>
      <p>The methodology proposed has some limitations. First, it fails to explore transfer learning of learning-to-rank datasets. While the top-ranked teams all used multistage retrieval approaches, confirming the value of such methodology in modern retrieval models [<xref ref-type="bibr" rid="ref18">18</xref>,<xref ref-type="bibr" rid="ref23">23</xref>], the reranking strategy within the different pipelines varied slightly among the participants. For example, the top-ranked team used transfer learning from the MS MARCO learning-to-rank dataset and from a zero-shot learning approach. Other teams in the top 3 used transfer learning from a silver collection, based on the known item search technique [<xref ref-type="bibr" rid="ref64">64</xref>]. Second, while we explored the combination of different topic items to build our queries, we failed to work on the document indexing unit, leaving all the normalization work to the probabilistic weighting models. As the COVID-19 literature comes from heterogeneous collections, containing sometimes only title and sometimes large full text, even with good finetuning of the model parameters, such variation in size and content poses a challenge to the first-stage retrieval model. Indeed, some strategies that explored decomposing the indexing unit into small structures, such as sentences and paragraphs, have achieved more competitive results [<xref ref-type="bibr" rid="ref23">23</xref>].</p>
      <p>Another limitation of our work was the ability to explore the freshness of the corpus. The TREC-COVID challenge dynamics, running throughout a sequence of rounds with new incremental search topics added on each round, provides an interesting setting for evaluating retrieval models in an infodemic context. It simulates typical search and discovery workflows, in which evolving queries are posed against an evolving body of knowledge over time, and already discovered documents in previous searches are no longer relevant [<xref ref-type="bibr" rid="ref65">65</xref>,<xref ref-type="bibr" rid="ref66">66</xref>]. A successful strategy in this case is to filter out results according to a cut-off date, thus reducing noise in the retrieval set. However, in retrospect, we noticed that another useful technique, which is very natural to an infodemic case, could be to decay the score of publications by their distance to the present time or explore their recency or freshness [<xref ref-type="bibr" rid="ref67">67</xref>,<xref ref-type="bibr" rid="ref68">68</xref>], as highlighted in <xref rid="figure7" ref-type="fig">Figure 7</xref>, rather than a hard cut-off (ie, December 2019 in our case) for all the rounds. We leave exploring such a strategy as future work.</p>
      <p>To conclude, we believe our information retrieval pipeline can provide a potential solution to help researchers, decision makers, and medical doctors, among others, search and find the correct information in the unique situation caused by the COVID-19 pandemic. We detailed the different components of this pipeline, including the traditional index-based information retrieval methods and the modern NLP-based neural network models, as well as insights and practical recipes to increase the quality of information retrieval of scientific publications targeted to the case of an infodemic. We grounded our results in the TREC-COVID challenge, where around 50 different teams participated in the 5 rounds of the competition. We showed very competitive results as judged by the official leaderboard of the challenge. Apart from the COVID-19 case, we believe our solutions can also be useful for other potential future infodemics.</p>
    </sec>
  </body>
  <back>
    <app-group/>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">BERT</term>
          <def>
            <p>Bidirectional Encoder Representations from Transformers</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">BM25</term>
          <def>
            <p>Okapi Best Match 25</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">Bpref</term>
          <def>
            <p>binary preference</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">CORD-19</term>
          <def>
            <p>COVID-19 Open Research Dataset</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">DFR</term>
          <def>
            <p>divergence from randomness</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">LMD</term>
          <def>
            <p>language model Dirichlet</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">MAP</term>
          <def>
            <p>mean average precision</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">MERS</term>
          <def>
            <p>Middle East respiratory syndrome</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb9">MS MARCO</term>
          <def>
            <p>Microsoft Machine Reading Comprehension</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb10">NDCG</term>
          <def>
            <p>normalized discounted cumulative gain</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb11">NLP</term>
          <def>
            <p>natural language processing</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb12">PMC</term>
          <def>
            <p>PubMed Central</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb13">RoBERTa</term>
          <def>
            <p>robustly optimized BERT approach</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb14">RRF</term>
          <def>
            <p>reciprocal rank fusion</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb15">SARS</term>
          <def>
            <p>severe acute respiratory syndrome</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb16">tf-idf</term>
          <def>
            <p>term frequency-inverse document frequency</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb17">WHO</term>
          <def>
            <p>World Health Organization</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>The study received funding from Innosuisse project funding number 41013.1 IP-ICT. CINECA has received funding from the European Union’s Horizon 2020 research and innovation programme under grant agreement No 825775.</p>
    </ack>
    <fn-group>
      <fn fn-type="con">
        <p>DT conceived the experiments. DT and SF conducted the experiments. DT, SF, EK, and PA analyzed the results. NB, EK, DVA, and PA prepared the data. SF, DT, JC, RG, and NN drafted the manuscript. All authors reviewed the manuscript.</p>
      </fn>
      <fn fn-type="conflict">
        <p>PA and NB work for Risklick AG. The other authors declare no competing interests.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Haghani</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Bliemer</surname>
              <given-names>MCJ</given-names>
            </name>
            <name name-style="western">
              <surname>Goerlandt</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>The scientific literature on Coronaviruses, COVID-19 and its associated safety-related research dimensions: A scientometric analysis and scoping review</article-title>
          <source>Saf Sci</source>
          <year>2020</year>
          <month>09</month>
          <volume>129</volume>
          <fpage>104806</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/32382213"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.ssci.2020.104806</pub-id>
          <pub-id pub-id-type="medline">32382213</pub-id>
          <pub-id pub-id-type="pii">S0925-7535(20)30203-4</pub-id>
          <pub-id pub-id-type="pmcid">PMC7203062</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Tangcharoensathien</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Calleja</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Nguyen</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Purnat</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>D'Agostino</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Garcia-Saiso</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Landry</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Rashidian</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Hamilton</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>AbdAllah</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Ghiga</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Hill</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Hougendobler</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>van Andel</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Nunn</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Brooks</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Sacco</surname>
              <given-names>PL</given-names>
            </name>
            <name name-style="western">
              <surname>De Domenico</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Mai</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Gruzd</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Alaphilippe</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Briand</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Framework for Managing the COVID-19 Infodemic: Methods and Results of an Online, Crowdsourced WHO Technical Consultation</article-title>
          <source>J Med Internet Res</source>
          <year>2020</year>
          <month>06</month>
          <day>26</day>
          <volume>22</volume>
          <issue>6</issue>
          <fpage>e19659</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2020/6/e19659/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/19659</pub-id>
          <pub-id pub-id-type="medline">32558655</pub-id>
          <pub-id pub-id-type="pii">v22i6e19659</pub-id>
          <pub-id pub-id-type="pmcid">PMC7332158</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Eysenbach</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>How to Fight an Infodemic: The Four Pillars of Infodemic Management</article-title>
          <source>J Med Internet Res</source>
          <year>2020</year>
          <month>06</month>
          <day>29</day>
          <volume>22</volume>
          <issue>6</issue>
          <fpage>e21820</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2020/6/e21820/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/21820</pub-id>
          <pub-id pub-id-type="medline">32589589</pub-id>
          <pub-id pub-id-type="pii">v22i6e21820</pub-id>
          <pub-id pub-id-type="pmcid">PMC7332253</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Eysenbach</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Infodemiology: The epidemiology of (mis)information</article-title>
          <source>Am J Med</source>
          <year>2002</year>
          <month>12</month>
          <day>15</day>
          <volume>113</volume>
          <issue>9</issue>
          <fpage>763</fpage>
          <lpage>5</lpage>
          <pub-id pub-id-type="doi">10.1016/s0002-9343(02)01473-0</pub-id>
          <pub-id pub-id-type="medline">12517369</pub-id>
          <pub-id pub-id-type="pii">S0002934302014730</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kristensen</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Lorenz</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>May</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Strauss</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Exploring the use of web searches for risk communication during COVID-19 in Germany</article-title>
          <source>Sci Rep</source>
          <year>2021</year>
          <month>03</month>
          <day>19</day>
          <volume>11</volume>
          <issue>1</issue>
          <fpage>6419</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/s41598-021-85873-4"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41598-021-85873-4</pub-id>
          <pub-id pub-id-type="medline">33742054</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41598-021-85873-4</pub-id>
          <pub-id pub-id-type="pmcid">PMC7979881</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Palayew</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Norgaard</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Safreed-Harmon</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Andersen</surname>
              <given-names>TH</given-names>
            </name>
            <name name-style="western">
              <surname>Rasmussen</surname>
              <given-names>LN</given-names>
            </name>
            <name name-style="western">
              <surname>Lazarus</surname>
              <given-names>JV</given-names>
            </name>
          </person-group>
          <article-title>Pandemic publishing poses a new COVID-19 challenge</article-title>
          <source>Nat Hum Behav</source>
          <year>2020</year>
          <month>07</month>
          <volume>4</volume>
          <issue>7</issue>
          <fpage>666</fpage>
          <lpage>669</lpage>
          <pub-id pub-id-type="doi">10.1038/s41562-020-0911-0</pub-id>
          <pub-id pub-id-type="medline">32576981</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41562-020-0911-0</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cheerkoot-Jalim</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Khedo</surname>
              <given-names>KK</given-names>
            </name>
          </person-group>
          <article-title>A systematic review of text mining approaches applied to various application areas in the biomedical domain</article-title>
          <source>JKM</source>
          <year>2020</year>
          <month>12</month>
          <day>21</day>
          <volume>25</volume>
          <issue>3</issue>
          <fpage>642</fpage>
          <lpage>668</lpage>
          <pub-id pub-id-type="doi">10.1108/jkm-09-2019-0524</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Massey</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Lu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Cohen</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Oren</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Moed</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Matzner</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Mahajan</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Caraballo</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Kumar</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Xue</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Ding</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Dreyer</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Roy</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Krumholz</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Engagement With COVID-19 Public Health Measures in the United States: A Cross-sectional Social Media Analysis from June to November 2020</article-title>
          <source>J Med Internet Res</source>
          <year>2021</year>
          <month>06</month>
          <day>21</day>
          <volume>23</volume>
          <issue>6</issue>
          <fpage>e26655</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2021/6/e26655/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/26655</pub-id>
          <pub-id pub-id-type="medline">34086593</pub-id>
          <pub-id pub-id-type="pii">v23i6e26655</pub-id>
          <pub-id pub-id-type="pmcid">PMC8218897</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wardle</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Derakhshan</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Thinking about ‘information disorder’: formats of misinformation, disinformation, and mal-information</article-title>
          <source>Journalism, ‘Fake News’ &#38; Disinformation</source>
          <year>2018</year>
          <access-date>2021-08-26</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://en.unesco.org/sites/default/files/f._jfnd_handbook_module_2.pdf">https://en.unesco.org/sites/default/files/f._jfnd_handbook_module_2.pdf</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Barua</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Barua</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Aktar</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Kabir</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Effects of misinformation on COVID-19 individual responses and recommendations for resilience of disastrous consequences of misinformation</article-title>
          <source>Prog Disaster Sci</source>
          <year>2020</year>
          <month>12</month>
          <volume>8</volume>
          <fpage>100119</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S2590-0617(20)30056-9"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.pdisas.2020.100119</pub-id>
          <pub-id pub-id-type="medline">34173443</pub-id>
          <pub-id pub-id-type="pii">S2590-0617(20)30056-9</pub-id>
          <pub-id pub-id-type="pmcid">PMC7373041</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>West</surname>
              <given-names>JD</given-names>
            </name>
            <name name-style="western">
              <surname>Bergstrom</surname>
              <given-names>CT</given-names>
            </name>
          </person-group>
          <article-title>Misinformation in and about science</article-title>
          <source>Proc Natl Acad Sci U S A</source>
          <year>2021</year>
          <month>04</month>
          <day>13</day>
          <volume>118</volume>
          <issue>15</issue>
          <fpage>1</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/33837146"/>
          </comment>
          <pub-id pub-id-type="doi">10.1073/pnas.1912444117</pub-id>
          <pub-id pub-id-type="medline">33837146</pub-id>
          <pub-id pub-id-type="pii">1912444117</pub-id>
          <pub-id pub-id-type="pmcid">PMC8054004</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zarocostas</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>How to fight an infodemic</article-title>
          <source>Lancet</source>
          <year>2020</year>
          <month>02</month>
          <day>29</day>
          <volume>395</volume>
          <issue>10225</issue>
          <fpage>676</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/32113495"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/S0140-6736(20)30461-X</pub-id>
          <pub-id pub-id-type="medline">32113495</pub-id>
          <pub-id pub-id-type="pii">S0140-6736(20)30461-X</pub-id>
          <pub-id pub-id-type="pmcid">PMC7133615</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>LL</given-names>
            </name>
            <name name-style="western">
              <surname>Lo</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Chandrasekhar</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Reas</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Burdick</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Eide</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Funk</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Katsis</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Kinney</surname>
              <given-names>RM</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Merrill</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Mooney</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Murdick</surname>
              <given-names>DA</given-names>
            </name>
            <name name-style="western">
              <surname>Rishi</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Sheehan</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Shen</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Stilson</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Wade</surname>
              <given-names>AD</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>NXR</given-names>
            </name>
            <name name-style="western">
              <surname>Wilhelm</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Xie</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Raymond</surname>
              <given-names>DM</given-names>
            </name>
            <name name-style="western">
              <surname>Weld</surname>
              <given-names>DS</given-names>
            </name>
            <name name-style="western">
              <surname>Etzioni</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Kohlmeier</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>CORD-19: The COVID-19 Open Research Dataset</article-title>
          <source>Proceedings of the 1st Workshop on NLP for COVID-19 at ACL 2020</source>
          <year>2020</year>
          <fpage>1</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://aclanthology.org/2020.nlpcovid19-acl.1"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Yi</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Jeong</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Sung</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Yoon</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Choi</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Ko</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Kang</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Answering Questions on COVID-19 in Real-Time</article-title>
          <source>Proceedings of the 1st Workshop on NLP for COVID-19 (Part 2) at EMNLP 2020</source>
          <year>2020</year>
          <fpage>1</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://aclanthology.org/2020.nlpcovid19-2.1/"/>
          </comment>
          <pub-id pub-id-type="doi">10.18653/v1/2020.nlpcovid19-2.1</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="web">
          <article-title>deepset / covid_bert_base</article-title>
          <source>Hugging Face</source>
          <access-date>2021-08-26</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://huggingface.co/deepset/covid_bert_base">https://huggingface.co/deepset/covid_bert_base</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="web">
          <article-title>COVID-19 Open Research Dataset Challenge (CORD-19)</article-title>
          <source>Kaggle</source>
          <access-date>2021-08-26</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.kaggle.com/allen-institute-for-ai/CORD-19-research-challenge">https://www.kaggle.com/allen-institute-for-ai/CORD-19-research-challenge</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="web">
          <article-title>TREC-COVID</article-title>
          <source>National Institutes of Standards and Technology</source>
          <access-date>2021-08-26</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://ir.nist.gov/covidSubmit/index.html">https://ir.nist.gov/covidSubmit/index.html</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Roberts</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Alam</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Bedrick</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Demner-Fushman</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Lo</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Soboroff</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Voorhees</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>LL</given-names>
            </name>
            <name name-style="western">
              <surname>Hersh</surname>
              <given-names>WR</given-names>
            </name>
          </person-group>
          <article-title>Searching for scientific evidence in a pandemic: An overview of TREC-COVID</article-title>
          <source>J Biomed Inform</source>
          <year>2021</year>
          <month>07</month>
          <day>08</day>
          <volume>121</volume>
          <fpage>103865</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/34245913"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2021.103865</pub-id>
          <pub-id pub-id-type="medline">34245913</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(21)00194-5</pub-id>
          <pub-id pub-id-type="pmcid">PMC8264272</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Roberts</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Alam</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Bedrick</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Demner-Fushman</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Lo</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Soboroff</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Voorhees</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>LL</given-names>
            </name>
            <name name-style="western">
              <surname>Hersh</surname>
              <given-names>WR</given-names>
            </name>
          </person-group>
          <article-title>TREC-COVID: rationale and structure of an information retrieval shared task for COVID-19</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2020</year>
          <month>07</month>
          <day>01</day>
          <volume>27</volume>
          <issue>9</issue>
          <fpage>1431</fpage>
          <lpage>1436</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/32365190"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/jamia/ocaa091</pub-id>
          <pub-id pub-id-type="medline">32365190</pub-id>
          <pub-id pub-id-type="pii">5828938</pub-id>
          <pub-id pub-id-type="pmcid">PMC7239098</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Voorhees</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Alam</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Bedrick</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Demner-Fushman</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Hersh</surname>
              <given-names>WR</given-names>
            </name>
            <name name-style="western">
              <surname>Lo</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Roberts</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Soboroff</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>LL</given-names>
            </name>
          </person-group>
          <article-title>TREC-COVID</article-title>
          <source>SIGIR Forum</source>
          <year>2020</year>
          <month>06</month>
          <volume>54</volume>
          <issue>1</issue>
          <fpage>1</fpage>
          <lpage>12</lpage>
          <pub-id pub-id-type="doi">10.1145/3451964.3451965</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Tonon</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Demartini</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Cudré-Mauroux</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Pooling-based continuous evaluation of information retrieval systems</article-title>
          <source>Inf Retrieval J</source>
          <year>2015</year>
          <month>9</month>
          <day>8</day>
          <volume>18</volume>
          <issue>5</issue>
          <fpage>445</fpage>
          <lpage>472</lpage>
          <pub-id pub-id-type="doi">10.1007/s10791-015-9266-y</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>MacAvaney</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Cohan</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Goharian</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <article-title>SLEDGE: A Simple Yet Effective Baseline for COVID-19 Scientific Knowledge Search</article-title>
          <source>Cornell University</source>
          <year>2020</year>
          <month>05</month>
          <day>05</day>
          <access-date>2021-08-26</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://arxiv.org/abs/2005.02365">http://arxiv.org/abs/2005.02365</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Gupta</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Tang</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Han</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Pradeep</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Lu</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Nogueira</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Cho</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Fang</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Lim</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Covidex: Neural Ranking Models and Keyword Search Infrastructure for the COVID-19 Open Research Dataset</article-title>
          <source>Proceedings of the First Workshop on Scholarly Document Processing</source>
          <year>2020</year>
          <fpage>31</fpage>
          <lpage>41</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://aclanthology.org/2020.sdp-1.5"/>
          </comment>
          <pub-id pub-id-type="doi">10.18653/v1/2020.sdp-1.5</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Esteva</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Kale</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Paulus</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Hashimoto</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Yin</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Radev</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Socher</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>COVID-19 information retrieval with deep-learning based semantic search, question answering, and abstractive summarization</article-title>
          <source>NPJ Digit Med</source>
          <year>2021</year>
          <month>04</month>
          <day>12</day>
          <volume>4</volume>
          <issue>1</issue>
          <fpage>68</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/s41746-021-00437-0"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41746-021-00437-0</pub-id>
          <pub-id pub-id-type="medline">33846532</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41746-021-00437-0</pub-id>
          <pub-id pub-id-type="pmcid">PMC8041998</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Nguyen</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Rybinski</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Karimi</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Xing</surname>
              <given-names>Z</given-names>
            </name>
          </person-group>
          <article-title>Searching Scientific Literature for Answers on COVID-19 Questions</article-title>
          <source>Cornell University</source>
          <year>2020</year>
          <access-date>2021-08-26</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://arxiv.org/abs/2007.02492">http://arxiv.org/abs/2007.02492</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Li</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Yates</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>MacAvaney</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>He</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Sun</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>PARADE: Passage Representation Aggregation for Document Reranking</article-title>
          <source>Cornell University</source>
          <year>2021</year>
          <access-date>2021-08-26</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://arxiv.org/abs/2008.09093">http://arxiv.org/abs/2008.09093</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Soni</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Roberts</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>An evaluation of two commercial deep learning-based information retrieval systems for COVID-19 literature</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2021</year>
          <month>01</month>
          <day>15</day>
          <volume>28</volume>
          <issue>1</issue>
          <fpage>132</fpage>
          <lpage>137</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/33197268"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/jamia/ocaa271</pub-id>
          <pub-id pub-id-type="medline">33197268</pub-id>
          <pub-id pub-id-type="pii">5983742</pub-id>
          <pub-id pub-id-type="pmcid">PMC7717324</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Robertson</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Zaragoza</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>The Probabilistic Relevance Framework: BM25 and Beyond</article-title>
          <source>FNT in Information Retrieval</source>
          <year>2009</year>
          <volume>3</volume>
          <issue>4</issue>
          <fpage>333</fpage>
          <lpage>389</lpage>
          <pub-id pub-id-type="doi">10.1561/1500000019</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lafferty</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Zhai</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Document language models, query models, and risk minimization for information retrieval</article-title>
          <year>2001</year>
          <conf-name>24th annual international ACM SIGIR conference on Research and development in information retrieval</conf-name>
          <conf-date>2001</conf-date>
          <conf-loc>New Orleans, LA</conf-loc>
          <pub-id pub-id-type="doi">10.1145/383952.383970</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Joachims</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Zhai</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Learning to rank for information retrieval (LR4IR 2007)</article-title>
          <source>SIGIR Forum</source>
          <year>2007</year>
          <month>12</month>
          <volume>41</volume>
          <issue>2</issue>
          <fpage>58</fpage>
          <lpage>62</lpage>
          <pub-id pub-id-type="doi">10.1145/1328964.1328974</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Burges</surname>
              <given-names>CJC</given-names>
            </name>
          </person-group>
          <article-title>From RankNet to LambdaRank to LambdaMART: An Overview</article-title>
          <source>Microsoft</source>
          <year>2010</year>
          <access-date>2021-08-26</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.microsoft.com/en-us/research/uploads/prod/2016/02/MSR-TR-2010-82.pdf">https://www.microsoft.com/en-us/research/uploads/prod/2016/02/MSR-TR-2010-82.pdf</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Craswell</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Mitra</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Yilmaz</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Campos</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Voorhees</surname>
              <given-names>EM</given-names>
            </name>
          </person-group>
          <article-title>Overview of the TREC 2019 deep learning track</article-title>
          <source>Cornell University</source>
          <year>2020</year>
          <access-date>2021-08-26</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/2003.07820">https://arxiv.org/abs/2003.07820</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Faessler</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Oleynik</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Hahn</surname>
              <given-names>U</given-names>
            </name>
          </person-group>
          <source>TREC</source>
          <year>2019</year>
          <access-date>2021-08-26</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://trec.nist.gov/pubs/trec28/papers/julie-mug.PM.pdf">https://trec.nist.gov/pubs/trec28/papers/julie-mug.PM.pdf</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Qin</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>LETOR: A benchmark collection for research on learning to rank for information retrieval</article-title>
          <source>Inf Retrieval</source>
          <year>2010</year>
          <month>1</month>
          <day>1</day>
          <volume>13</volume>
          <issue>4</issue>
          <fpage>346</fpage>
          <lpage>374</lpage>
          <pub-id pub-id-type="doi">10.1007/s10791-009-9123-y</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Nguyen</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Rosenberg</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Song</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Gao</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Tiwary</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Majumder</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Deng</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>MS MARCO: A human generated machine reading comprehension dataset</article-title>
          <source>Microsoft</source>
          <year>2016</year>
          <month>11</month>
          <access-date>2021-08-26</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.microsoft.com/en-us/research/publication/ms-marco-human-generated-machine-reading-comprehension-dataset/">https://www.microsoft.com/en-us/research/publication/ms-marco-human-generated-machine-reading-comprehension-dataset/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cao</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Qin</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>TY</given-names>
            </name>
            <name name-style="western">
              <surname>Tsai</surname>
              <given-names>MF</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Learning to rank: from pairwise approach to listwise approach</article-title>
          <year>2007</year>
          <conf-name>24th international conference on Machine learning</conf-name>
          <conf-date>June 20-24, 2007</conf-date>
          <conf-loc>Corvalis, OR</conf-loc>
          <pub-id pub-id-type="doi">10.1145/1273496.1273513</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Li</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Burges</surname>
              <given-names>CJ</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>Q</given-names>
            </name>
          </person-group>
          <article-title>Mcrank: Learning to rank using multiple classification and gradient boosting</article-title>
          <source>NeurIPS Proceedings</source>
          <year>2007</year>
          <access-date>2021-08-26</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://papers.nips.cc/paper/2007/file/b86e8d03fe992d1b0e19656875ee557c-Paper.pdf">https://papers.nips.cc/paper/2007/file/b86e8d03fe992d1b0e19656875ee557c-Paper.pdf</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref38">
        <label>38</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Devlin</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Chang</surname>
              <given-names>MW</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Toutanova</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding</article-title>
          <source>Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers)</source>
          <year>2019</year>
          <fpage>4171</fpage>
          <lpage>4186</lpage>
          <pub-id pub-id-type="doi">10.18653/v1/N19-1423</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref39">
        <label>39</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yilmaz</surname>
              <given-names>ZA</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Applying BERT to Document Retrieval with Birch</article-title>
          <source>Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP): System Demonstrations</source>
          <year>2019</year>
          <fpage>19</fpage>
          <lpage>24</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://aclanthology.org/D19-3004/"/>
          </comment>
          <pub-id pub-id-type="doi">10.18653/v1/d19-3004</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref40">
        <label>40</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Vaswani</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Shazeer</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Parmar</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Uszkoreit</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Jones</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Gomez</surname>
              <given-names>AN</given-names>
            </name>
            <name name-style="western">
              <surname>Kaiser</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Polosukhin</surname>
              <given-names>I</given-names>
            </name>
          </person-group>
          <article-title>Attention is all you need</article-title>
          <source>Cornell University</source>
          <year>2017</year>
          <access-date>2021-08-26</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1706.03762">https://arxiv.org/abs/1706.03762</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref41">
        <label>41</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Ott</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Goyal</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Du</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Joshi</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Levy</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Lewis</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Zettlemoyer</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Stoyanov</surname>
              <given-names>V</given-names>
            </name>
          </person-group>
          <article-title>RoBERTa: A robustly optimized BERT pretraining approach</article-title>
          <source>Cornell University</source>
          <year>2019</year>
          <access-date>2021-08-26</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1907.11692">https://arxiv.org/abs/1907.11692</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref42">
        <label>42</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Dai</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Carbonell</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Salakhutdinov</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Le</surname>
              <given-names>QV</given-names>
            </name>
          </person-group>
          <article-title>XLNet: Generalized autoregressive pretraining for language understanding</article-title>
          <source>Cornell University</source>
          <year>2019</year>
          <access-date>2021-08-26</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1906.08237">https://arxiv.org/abs/1906.08237</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref43">
        <label>43</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Beltagy</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Lo</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Cohan</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>SciBERT: A Pretrained Language Model for Scientific Text</article-title>
          <source>Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP)</source>
          <year>2019</year>
          <fpage>3615</fpage>
          <lpage>3620</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://aclanthology.org/D19-1371/"/>
          </comment>
          <pub-id pub-id-type="doi">10.18653/v1/d19-1371</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref44">
        <label>44</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Reimers</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Gurevych</surname>
              <given-names>I</given-names>
            </name>
          </person-group>
          <article-title>Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks</article-title>
          <source>Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP)</source>
          <year>2019</year>
          <fpage>3982</fpage>
          <lpage>3992</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://aclanthology.org/D19-1410/"/>
          </comment>
          <pub-id pub-id-type="doi">10.18653/v1/d19-1410</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref45">
        <label>45</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Fang</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Anserini: Enabling the Use of Lucene for Information Retrieval Research</article-title>
          <source>SIGIR '17: Proceedings of the 40th International ACM SIGIR Conference on Research and Development in Information Retrieval</source>
          <year>2017</year>
          <fpage>1253</fpage>
          <lpage>1256</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dl.acm.org/doi/10.1145/3077136.3080721"/>
          </comment>
          <pub-id pub-id-type="doi">10.1145/3077136.3080721</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref46">
        <label>46</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Robertson</surname>
              <given-names>SE</given-names>
            </name>
            <name name-style="western">
              <surname>Jones</surname>
              <given-names>KS</given-names>
            </name>
          </person-group>
          <article-title>Relevance weighting of search terms</article-title>
          <source>J. Am. Soc. Inf. Sci</source>
          <year>1976</year>
          <month>05</month>
          <volume>27</volume>
          <issue>3</issue>
          <fpage>129</fpage>
          <lpage>146</lpage>
          <pub-id pub-id-type="doi">10.1002/asi.4630270302</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref47">
        <label>47</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Amati</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Van Rijsbergen</surname>
              <given-names>CJ</given-names>
            </name>
          </person-group>
          <article-title>Probabilistic models of information retrieval based on measuring the divergence from randomness</article-title>
          <source>ACM Trans. Inf. Syst</source>
          <year>2002</year>
          <month>10</month>
          <volume>20</volume>
          <issue>4</issue>
          <fpage>357</fpage>
          <lpage>389</lpage>
          <pub-id pub-id-type="doi">10.1145/582415.582416</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref48">
        <label>48</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhai</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Lafferty</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>A Study of Smoothing Methods for Language Models Applied to Ad Hoc Information Retrieval</article-title>
          <source>SIGIR Forum</source>
          <year>2017</year>
          <month>08</month>
          <day>02</day>
          <volume>51</volume>
          <issue>2</issue>
          <fpage>268</fpage>
          <lpage>276</lpage>
          <pub-id pub-id-type="doi">10.1145/3130348.3130377</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref49">
        <label>49</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hewitt</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Manning</surname>
              <given-names>CD</given-names>
            </name>
          </person-group>
          <article-title>A Structural Probe for Finding Syntax in Word Representations</article-title>
          <source>Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers)</source>
          <year>2019</year>
          <fpage>4129</fpage>
          <lpage>4138</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://aclanthology.org/N19-1419/"/>
          </comment>
          <pub-id pub-id-type="doi">10.18653/v1/N19-1419</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref50">
        <label>50</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yenicelik</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Schmidt</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Kilcher</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>How does BERT capture semantics? A closer look at polysemous words</article-title>
          <source>Proceedings of the Third BlackboxNLP Workshop on Analyzing and Interpreting Neural Networks for NLP</source>
          <year>2020</year>
          <fpage>156</fpage>
          <lpage>162</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://aclanthology.org/2020.blackboxnlp-1.15/"/>
          </comment>
          <pub-id pub-id-type="doi">10.18653/v1/2020.blackboxnlp-1.15</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref51">
        <label>51</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kingma</surname>
              <given-names>DP</given-names>
            </name>
            <name name-style="western">
              <surname>Ba</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Adam: A method for stochastic gradient descent</article-title>
          <source>Cornell University</source>
          <year>2014</year>
          <access-date>2021-08-26</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1412.6980">https://arxiv.org/abs/1412.6980</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref52">
        <label>52</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cormack</surname>
              <given-names>GV</given-names>
            </name>
            <name name-style="western">
              <surname>Clarke</surname>
              <given-names>CLA</given-names>
            </name>
            <name name-style="western">
              <surname>Buettcher</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Reciprocal rank fusion outperforms condorcet and individual rank learning methods</article-title>
          <source>SIGIR '09: Proceedings of the 32nd international ACM SIGIR conference on Research and development in information retrieval</source>
          <year>2009</year>
          <fpage>758</fpage>
          <lpage>759</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dl.acm.org/doi/10.1145/1571941.1572114"/>
          </comment>
          <pub-id pub-id-type="doi">10.1145/1571941.1572114</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref53">
        <label>53</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Knafou</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Naderi</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Copara</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Teodoro</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Ruch</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>BiTeM at WNUT 2020 Shared Task-1: Named Entity Recognition over Wet Lab Protocols using an Ensemble of Contextual Language Models</article-title>
          <source>Proceedings of the Sixth Workshop on Noisy User-generated Text (W-NUT 2020)</source>
          <year>2020</year>
          <fpage>305</fpage>
          <lpage>313</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://aclanthology.org/2020.wnut-1.40/"/>
          </comment>
          <pub-id pub-id-type="doi">10.18653/v1/2020.wnut-1.40</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref54">
        <label>54</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Copara</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Knafou</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Naderi</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Moro</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Ruch</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Teodoro</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Contextualized French Language Models for Biomedical Named Entity Recognition</article-title>
          <source>Actes de la 6e conférence conjointe Journées d'Études sur la Parole (JEP, 33e édition), Traitement Automatique des Langues Naturelles (TALN, 27e édition), Rencontre des Étudiants Chercheurs en Informatique pour le Traitement Automatique des Langues (RÉCITAL, 22e édition). Atelier DÉfi Fouille de Textes</source>
          <year>2020</year>
          <fpage>36</fpage>
          <lpage>48</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://aclanthology.org/2020.jeptalnrecital-deft.4"/>
          </comment>
          <pub-id pub-id-type="doi">10.18653/v1/2020.wnut-1.40</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref55">
        <label>55</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Copara</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Naderi</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Knafou</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Ruch</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Teodoro</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Named Entity Recognition in Chemical Patents using Ensemble of Contextual Language Models</article-title>
          <year>2020</year>
          <conf-name>Work Notes CLEF 2020 - Conf Labs Eval Forum</conf-name>
          <conf-date>September 2020</conf-date>
          <conf-loc>Thessaloniki, Greece</conf-loc>
          <fpage>22</fpage>
          <lpage>25</lpage>
          <pub-id pub-id-type="doi">10.18653/v1/2020.wnut-1.40</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref56">
        <label>56</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Siemieniuk</surname>
              <given-names>RA</given-names>
            </name>
            <name name-style="western">
              <surname>Bartoszko</surname>
              <given-names>JJ</given-names>
            </name>
            <name name-style="western">
              <surname>Ge</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Zeraatkar</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Izcovich</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Kum</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Pardo-Hernandez</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Qasim</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Martinez</surname>
              <given-names>JPD</given-names>
            </name>
            <name name-style="western">
              <surname>Rochwerg</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Lamontagne</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Han</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Agarwal</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Agoritsas</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Chu</surname>
              <given-names>DK</given-names>
            </name>
            <name name-style="western">
              <surname>Couban</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Cusano</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Darzi</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Devji</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Fang</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Fang</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Flottorp</surname>
              <given-names>SA</given-names>
            </name>
            <name name-style="western">
              <surname>Foroutan</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Ghadimi</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Heels-Ansdell</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Honarmand</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Hou</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Hou</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Ibrahim</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Khamis</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Lam</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Loeb</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Marcucci</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>McLeod</surname>
              <given-names>SL</given-names>
            </name>
            <name name-style="western">
              <surname>Motaghi</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Murthy</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Mustafa</surname>
              <given-names>RA</given-names>
            </name>
            <name name-style="western">
              <surname>Neary</surname>
              <given-names>JD</given-names>
            </name>
            <name name-style="western">
              <surname>Rada</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Riaz</surname>
              <given-names>IB</given-names>
            </name>
            <name name-style="western">
              <surname>Sadeghirad</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Sekercioglu</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Sheng</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Sreekanta</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Switzer</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Tendal</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Thabane</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Tomlinson</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Turner</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Vandvik</surname>
              <given-names>PO</given-names>
            </name>
            <name name-style="western">
              <surname>Vernooij</surname>
              <given-names>RW</given-names>
            </name>
            <name name-style="western">
              <surname>Viteri-García</surname>
              <given-names>Andrés</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Yao</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Ye</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Guyatt</surname>
              <given-names>GH</given-names>
            </name>
            <name name-style="western">
              <surname>Brignardello-Petersen</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Drug treatments for covid-19: living systematic review and network meta-analysis</article-title>
          <source>BMJ</source>
          <year>2020</year>
          <month>07</month>
          <day>30</day>
          <volume>370</volume>
          <fpage>m2980</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.bmj.com/lookup/pmidlookup?view=long&#38;pmid=32732190"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/bmj.m2980</pub-id>
          <pub-id pub-id-type="medline">32732190</pub-id>
          <pub-id pub-id-type="pmcid">PMC7390912</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref57">
        <label>57</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Buitrago-Garcia</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Egli-Gany</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Counotte</surname>
              <given-names>MJ</given-names>
            </name>
            <name name-style="western">
              <surname>Hossmann</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Imeri</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Ipekci</surname>
              <given-names>AM</given-names>
            </name>
            <name name-style="western">
              <surname>Salanti</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Low</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <article-title>Occurrence and transmission potential of asymptomatic and presymptomatic SARS-CoV-2 infections: A living systematic review and meta-analysis</article-title>
          <source>PLoS Med</source>
          <year>2020</year>
          <month>09</month>
          <volume>17</volume>
          <issue>9</issue>
          <fpage>e1003346</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.plos.org/10.1371/journal.pmed.1003346"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pmed.1003346</pub-id>
          <pub-id pub-id-type="medline">32960881</pub-id>
          <pub-id pub-id-type="pii">PMEDICINE-D-20-02690</pub-id>
          <pub-id pub-id-type="pmcid">PMC7508369</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref58">
        <label>58</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hutson</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Artificial-intelligence tools aim to tame the coronavirus literature</article-title>
          <source>Nature</source>
          <year>2020</year>
          <month>06</month>
          <day>09</day>
          <fpage>1</fpage>
          <pub-id pub-id-type="doi">10.1038/d41586-020-01733-7</pub-id>
          <pub-id pub-id-type="medline">34103725</pub-id>
          <pub-id pub-id-type="pii">10.1038/d41586-020-01733-7</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref59">
        <label>59</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Vogel</surname>
              <given-names>JP</given-names>
            </name>
            <name name-style="western">
              <surname>Tendal</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Giles</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Whitehead</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Burton</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Chakraborty</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Cheyne</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Downton</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Fraile Navarro</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Gleeson</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Gordon</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Hunt</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Kitschke</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>McDonald</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>McDonnell</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Middleton</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Millard</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Murano</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Oats</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Tate</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>White</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Elliott</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Roach</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Homer</surname>
              <given-names>CSE</given-names>
            </name>
            <collab>National COVID-19 Clinical Evidence Taskforce</collab>
          </person-group>
          <article-title>Clinical care of pregnant and postpartum women with COVID-19: Living recommendations from the National COVID-19 Clinical Evidence Taskforce</article-title>
          <source>Aust N Z J Obstet Gynaecol</source>
          <year>2020</year>
          <month>12</month>
          <volume>60</volume>
          <issue>6</issue>
          <fpage>840</fpage>
          <lpage>851</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/33119139"/>
          </comment>
          <pub-id pub-id-type="doi">10.1111/ajo.13270</pub-id>
          <pub-id pub-id-type="medline">33119139</pub-id>
          <pub-id pub-id-type="pmcid">PMC7820999</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref60">
        <label>60</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Haas</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Alvarez</surname>
              <given-names>DV</given-names>
            </name>
            <name name-style="western">
              <surname>Borissov</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Ferdowsi</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>von Meyenn</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Trelle</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Teodoro</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Amini</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Utilizing Artificial Intelligence to Manage COVID-19 Scientific Evidence Torrent with Risklick AI: A Critical Tool for Pharmacology and Therapy Development</article-title>
          <source>Pharmacology</source>
          <year>2021</year>
          <volume>106</volume>
          <issue>5-6</issue>
          <fpage>244</fpage>
          <lpage>253</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.karger.com?DOI=10.1159/000515908"/>
          </comment>
          <pub-id pub-id-type="doi">10.1159/000515908</pub-id>
          <pub-id pub-id-type="medline">33910199</pub-id>
          <pub-id pub-id-type="pii">000515908</pub-id>
          <pub-id pub-id-type="pmcid">PMC8247831</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref61">
        <label>61</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Teodoro</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Mottin</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Gobeill</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Gaudinat</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Vachon</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Ruch</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Improving average ranking precision in user searches for biomedical research datasets</article-title>
          <source>Database (Oxford)</source>
          <year>2017</year>
          <month>01</month>
          <day>01</day>
          <fpage>2017</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://academic.oup.com/database/article-lookup/doi/10.1093/database/bax083"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/database/bax083</pub-id>
          <pub-id pub-id-type="medline">29220475</pub-id>
          <pub-id pub-id-type="pii">4600047</pub-id>
          <pub-id pub-id-type="pmcid">PMC5714153</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref62">
        <label>62</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rothkopf</surname>
              <given-names>DJ</given-names>
            </name>
          </person-group>
          <article-title>When the buzz bites back</article-title>
          <source>Washington Post</source>
          <year>2003</year>
          <month>05</month>
          <day>11</day>
          <access-date>2021-08-26</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www1.udel.edu/globalagenda/2004/student/readings/infodemic.html">http://www1.udel.edu/globalagenda/2004/student/readings/infodemic.html</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref63">
        <label>63</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Eysenbach</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Infodemiology and infoveillance: framework for an emerging set of public health informatics methods to analyze search, communication and publication behavior on the Internet</article-title>
          <source>J Med Internet Res</source>
          <year>2009</year>
          <month>03</month>
          <day>27</day>
          <volume>11</volume>
          <issue>1</issue>
          <fpage>e11</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2009/1/e11/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/jmir.1157</pub-id>
          <pub-id pub-id-type="medline">19329408</pub-id>
          <pub-id pub-id-type="pii">v11i1e11</pub-id>
          <pub-id pub-id-type="pmcid">PMC2762766</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref64">
        <label>64</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ogilvie</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Callan</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Combining document representations for known-item search</article-title>
          <source>SIGIR '03: Proceedings of the 26th annual international ACM SIGIR conference on Research and development in informaion retrieval</source>
          <year>2003</year>
          <fpage>143</fpage>
          <lpage>150</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dl.acm.org/doi/10.1145/860435.860463"/>
          </comment>
          <pub-id pub-id-type="doi">10.1145/860435.860463</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref65">
        <label>65</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Li</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Croft</surname>
              <given-names>WB</given-names>
            </name>
          </person-group>
          <article-title>Time-based language models</article-title>
          <source>CIKM '03: Proceedings of the twelfth international conference on Information and knowledge management</source>
          <year>2003</year>
          <fpage>469</fpage>
          <lpage>475</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dl.acm.org/doi/10.1145/956863.956951"/>
          </comment>
          <pub-id pub-id-type="doi">10.1145/956863.956951</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref66">
        <label>66</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Moulahi</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Tamine</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Yahia</surname>
              <given-names>Sb</given-names>
            </name>
          </person-group>
          <article-title>When time meets information retrieval: Past proposals, current plans and future trends</article-title>
          <source>Journal of Information Science</source>
          <year>2016</year>
          <month>07</month>
          <day>11</day>
          <volume>42</volume>
          <issue>6</issue>
          <fpage>725</fpage>
          <lpage>747</lpage>
          <pub-id pub-id-type="doi">10.1177/0165551515607277</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref67">
        <label>67</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Dong</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Kolari</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Bai</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Diaz</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Chang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Zheng</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Zha</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Time is of the essence: improving recency ranking using Twitter data</article-title>
          <source>WWW '10: Proceedings of the 19th international conference on World wide web</source>
          <year>2010</year>
          <fpage>331</fpage>
          <lpage>340</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dl.acm.org/doi/10.1145/1772690.1772725"/>
          </comment>
          <pub-id pub-id-type="doi">10.1145/1772690.1772725</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref68">
        <label>68</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Amati</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Amodeo</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Gaibisso</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Survival analysis for freshness in microblogging search</article-title>
          <source>CIKM '12: Proceedings of the 21st ACM international conference on Information and knowledge management</source>
          <year>2012</year>
          <fpage>2483</fpage>
          <lpage>2486</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dl.acm.org/doi/10.1145/2396761.2398672"/>
          </comment>
          <pub-id pub-id-type="doi">10.1145/2396761.2398672</pub-id>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
