<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMIR</journal-id>
      <journal-id journal-id-type="nlm-ta">J Med Internet Res</journal-id>
      <journal-title>Journal of Medical Internet Research</journal-title>
      <issn pub-type="epub">1438-8871</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
    <article-id pub-id-type="publisher-id">v20i6e10281</article-id>
    <article-id pub-id-type="pmid">29941415</article-id>
    <article-id pub-id-type="doi">10.2196/10281</article-id>
    <article-categories>
      <subj-group subj-group-type="heading">
        <subject>Original Paper</subject>
      </subj-group>
      <subj-group subj-group-type="article-type">
        <subject>Original Paper</subject>
      </subj-group>
    </article-categories>
    <title-group>
      <article-title>A Deep Learning Method to Automatically Identify Reports of Scientifically Rigorous Clinical Research from the Biomedical Literature: Comparative Analytic Study</article-title>
    </title-group>
    <contrib-group>
      <contrib contrib-type="editor">
        <name>
          <surname>Eysenbach</surname>
          <given-names>Gunther</given-names>
        </name>
      </contrib>
    </contrib-group>
    <contrib-group>
      <contrib contrib-type="reviewer">
        <name>
          <surname>Banf</surname>
          <given-names>Michael</given-names>
        </name>
      </contrib>
      <contrib contrib-type="reviewer">
        <name>
          <surname>Wang</surname>
          <given-names>Yanshan</given-names>
        </name>
      </contrib>
    </contrib-group>
    <contrib-group>
      <contrib contrib-type="author" id="contrib1" corresp="yes" equal-contrib="yes">
      <name name-style="western">
        <surname>Del Fiol</surname>
        <given-names>Guilherme</given-names>
      </name>
      <degrees>MD, PhD</degrees>
      <xref rid="aff1" ref-type="aff">1</xref>
      <address>
        <institution>University of Utah</institution>
        <institution>Department of Biomedical Informatics</institution>
        <addr-line>421 Wakara Way</addr-line>
        <addr-line>Suite 140</addr-line>
        <addr-line>Salt Lake City, UT, 84108</addr-line>
        <country>United States</country>
        <phone>1 8015814080</phone>
        <email>guilherme.delfiol@utah.edu</email>
      </address>  
      <ext-link ext-link-type="orcid">http://orcid.org/0000-0001-9954-6799</ext-link></contrib>
      <contrib contrib-type="author" id="contrib2" equal-contrib="yes">
        <name name-style="western">
          <surname>Michelson</surname>
          <given-names>Matthew</given-names>
        </name>
        <degrees>PhD</degrees>
        <xref rid="aff2" ref-type="aff">2</xref>
        <xref rid="aff3" ref-type="aff">3</xref>
        <ext-link ext-link-type="orcid">http://orcid.org/0000-0003-3346-2132</ext-link>
      </contrib>
      <contrib contrib-type="author" id="contrib3">
        <name name-style="western">
          <surname>Iorio</surname>
          <given-names>Alfonso</given-names>
        </name>
        <degrees>MD, PhD</degrees>
        <xref rid="aff4" ref-type="aff">4</xref>
        <xref rid="aff5" ref-type="aff">5</xref>
        <ext-link ext-link-type="orcid">http://orcid.org/0000-0002-3331-8766</ext-link>
      </contrib>
      <contrib contrib-type="author" id="contrib4">
        <name name-style="western">
          <surname>Cotoi</surname>
          <given-names>Chris</given-names>
        </name>
        <degrees>BEng, EMBA</degrees>
        <xref rid="aff6" ref-type="aff">6</xref>
        <ext-link ext-link-type="orcid">http://orcid.org/0000-0001-7029-0582</ext-link>
      </contrib>
      <contrib contrib-type="author" id="contrib5">
        <name name-style="western">
          <surname>Haynes</surname>
          <given-names>R Brian</given-names>
        </name>
        <degrees>MD, PhD</degrees>
        <xref rid="aff4" ref-type="aff">4</xref>
        <xref rid="aff5" ref-type="aff">5</xref>
        <ext-link ext-link-type="orcid">http://orcid.org/0000-0002-1453-3196</ext-link>
      </contrib>
    </contrib-group>
    <aff id="aff1">
    <sup>1</sup>
    <institution>University of Utah</institution>
    <institution>Department of Biomedical Informatics</institution>  
    <addr-line>Salt Lake City, UT</addr-line>
    <country>United States</country></aff>
    <aff id="aff2">
      <sup>2</sup>
      <institution>Evid Science</institution>
      <addr-line>Los Angeles, CA</addr-line>
      <country>United States</country>
    </aff>
    <aff id="aff3">
      <sup>3</sup>
      <institution>InferLink Corporation</institution>
      <addr-line>Los Angeles, CA</addr-line>
      <country>United States</country>
    </aff>
    <aff id="aff4">
    <sup>4</sup>
    <institution>Department of Health Research Methods, Evidence, and Impact</institution>
    <institution>McMaster University</institution>  
    <addr-line>Hamilton, ON</addr-line>
    <country>Canada</country></aff>
    <aff id="aff5">
    <sup>5</sup>
    <institution>Department of Medicine</institution>
    <institution>Faculty of Health Sciences</institution>  
    <institution>McMaster University</institution>  
    <addr-line>Hamilton, ON</addr-line>
    <country>Canada</country></aff>
    <aff id="aff6">
    <sup>6</sup>
    <institution>Health Information Research Unit</institution>
    <institution>McMaster University</institution>  
    <addr-line>Hamilton, ON</addr-line>
    <country>Canada</country></aff>
    <author-notes>
      <corresp>Corresponding Author: Guilherme Del Fiol 
      <email>guilherme.delfiol@utah.edu</email></corresp>
    </author-notes>
    <pub-date pub-type="collection"><month>06</month><year>2018</year></pub-date>
    <pub-date pub-type="epub">
      <day>25</day>
      <month>06</month>
      <year>2018</year>
    </pub-date>
    <volume>20</volume>
    <issue>6</issue>
    <elocation-id>e10281</elocation-id>
    <!--history from ojs - api-xml-->
    <history>
      <date date-type="received">
        <day>5</day>
        <month>3</month>
        <year>2018</year>
      </date>
      <date date-type="rev-request">
        <day>12</day>
        <month>4</month>
        <year>2018</year>
      </date>
      <date date-type="rev-recd">
        <day>26</day>
        <month>4</month>
        <year>2018</year>
      </date>
      <date date-type="accepted">
        <day>12</day>
        <month>5</month>
        <year>2018</year>
      </date>
    </history>
    <!--(c) the authors - correct author names and publication date here if necessary. Date in form ', dd.mm.yyyy' after jmir.org-->
    <copyright-statement>©Guilherme Del Fiol, Matthew Michelson, Alfonso Iorio, Chris Cotoi, R Brian Haynes. Originally published in the Journal of Medical Internet Research (http://www.jmir.org), 25.06.2018.</copyright-statement>
    <copyright-year>2018</copyright-year>
    <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
      <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in the Journal of Medical Internet Research, is properly cited. The complete bibliographic information, a link to the original publication on http://www.jmir.org/, as well as this copyright and license information must be included.</p>
    </license>  
    <self-uri xlink:href="http://www.jmir.org/2018/6/e10281/" xlink:type="simple"/>
    <abstract>
      <sec sec-type="background">
        <title>Background</title>
        <p>A major barrier to the practice of evidence-based medicine is efficiently finding scientifically sound studies on a given clinical topic.</p>
      </sec>
      <sec sec-type="objective">
        <title>Objective</title>
        <p>To investigate a deep learning approach to retrieve scientifically sound treatment studies from the biomedical literature.</p>
      </sec>
      <sec sec-type="methods">
        <title>Methods</title>
        <p>We trained a Convolutional Neural Network using a noisy dataset of 403,216 PubMed citations with title and abstract as features. The deep learning model was compared with state-of-the-art search filters, such as PubMed’s Clinical Query Broad treatment filter, McMaster’s textword search strategy (no Medical Subject Heading, MeSH, terms), and Clinical Query Balanced treatment filter. A previously annotated dataset (Clinical Hedges) was used as the gold standard.</p>
      </sec>
      <sec sec-type="results">
        <title>Results</title>
        <p>The deep learning model obtained significantly lower recall than the Clinical Queries Broad treatment filter (96.9% vs 98.4%; <italic>P</italic>&#60;.001); and equivalent recall to McMaster’s textword search (96.9% vs 97.1%; <italic>P</italic>=.57) and Clinical Queries Balanced filter (96.9% vs 97.0%; <italic>P</italic>=.63). Deep learning obtained significantly higher precision than the Clinical Queries Broad filter (34.6% vs 22.4%; <italic>P</italic>&#60;.001) and McMaster’s textword search (34.6% vs 11.8%; <italic>P</italic>&#60;.001), but was significantly lower than the Clinical Queries Balanced filter (34.6% vs 40.9%; <italic>P</italic>&#60;.001).</p>
      </sec>
      <sec sec-type="conclusions">
        <title>Conclusions</title>
        <p>Deep learning performed well compared to state-of-the-art search filters, especially when citations were not indexed. Unlike previous machine learning approaches, the proposed deep learning model does not require feature engineering, or time-sensitive or proprietary features, such as MeSH terms and bibliometrics. Deep learning is a promising approach to identifying reports of scientifically rigorous clinical research. Further work is needed to optimize the deep learning model and to assess generalizability to other areas, such as diagnosis, etiology, and prognosis.</p>
      </sec>
    </abstract>
    <kwd-group>
      <kwd>information retrieval</kwd>
      <kwd>evidence-based medicine</kwd>
      <kwd>deep learning</kwd>
      <kwd>machine learning</kwd>
      <kwd>literature databases</kwd>
    </kwd-group></article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <sec>
        <title>Background and Significance</title>
        <p>With roughly 95 clinical trials published per day, the biomedical literature is increasing at a very rapid pace, imposing a significant challenge to the practice of evidence-based medicine. However, only 1% of studies in the biomedical literature meet minimum criteria for scientific quality [<xref ref-type="bibr" rid="ref1">1</xref>] and most published research findings are eventually shown to be false [<xref ref-type="bibr" rid="ref2">2</xref>]. As a result, a major barrier to the practice of evidence-based medicine is efficiently finding the relatively small number of scientifically sound studies on a given clinical topic. Systematic reviews and meta-analyses attempt to summarize the available evidence on a given clinical question aiming for near perfect recall. However, systematic reviews are often not available and become quickly outdated. Therefore, clinicians may benefit from access to the latest evidence from high-quality clinical trials before they are included in systematic reviews.</p>
        <p>For over two decades, the <italic>Clinical Query</italic> filters have been the state-of-the-art approach to retrieve scientifically sound clinical studies from the primary literature, both for the development of systematic reviews and point-of-care decision support [<xref ref-type="bibr" rid="ref3">3</xref>,<xref ref-type="bibr" rid="ref4">4</xref>]. The Clinical Query filters consist of Boolean search strategies based on textwords and Medical Subject Headings (MeSH) terms that have been developed and validated through a systematic approach [<xref ref-type="bibr" rid="ref5">5</xref>]. The search textwords and MeSH terms used in the Clinical Query filters reflect widely accepted criteria for scientifically sound clinical studies, such as “clinical trial,” “random allocation,” and “randomized controlled trial [Publication Type].” Although initially developed in the 1990s, the Clinical Query filters have been updated over time and the recall and precision of the filters developed in 2000 did not significantly change a decade later [<xref ref-type="bibr" rid="ref6">6</xref>]. Clinical Query filters for several topics are available in PubMed and several other bibliographic biomedical databases, with focuses on areas such as therapy, diagnosis, etiology, and prognosis, and these are tuned for precision or recall. A limitation of the Clinical Query filters is their dependency on MeSH terms, which are added to PubMed citations 23 to 177 days after an article is published (according to a previous study [<xref ref-type="bibr" rid="ref7">7</xref>]) and 17 to 328 days according to our more recent analysis. In addition, there is room for improvement, especially in terms of retrieval precision.</p>
        <p>Previous studies investigated the use of machine learning approaches to automate the retrieval of scientifically sound studies [<xref ref-type="bibr" rid="ref8">8</xref>-<xref ref-type="bibr" rid="ref10">10</xref>]. Features used in those studies included bibliometrics (eg, citation count, impact factor), words in the article title and abstract, MeSH terms, Unified Medical Language System (UMLS) concepts, and semantic predications. Although the results of machine learning studies were promising, they had important limitations that precluded wide adoption in practice, such as a requirement for significant feature engineering (eg, UMLS concepts, bibliometrics), reliance on proprietary and time-dependent features (eg, MeSH index, citation counts), and potential overfitting to a particular dataset.</p>
        <p>In the present study, we investigated a deep learning approach for the retrieval of scientifically sound treatment studies from PubMed. To overcome limitations of previous methods, we focused on an approach that requires very little feature engineering and does not rely on proprietary or time-dependent features. We then compared the performance of a deep learning model with state-of-the-art PubMed search strategies against Clinical Hedges, a rigorous gold standard of over 50,000 studies that were systematically rated for scientific quality according to rigorous criteria [<xref ref-type="bibr" rid="ref5">5</xref>].</p>
      </sec>
      <sec>
        <title>Deep Machine Learning</title>
        <p>Recent advances in machine learning have led to dramatic improvements in the abilities of computers to mimic human activities. Many of these improvements leverage “deep learning,” and embody neural-networks with many nodes that are fully connected across layers of the network. In the context of supervised deep learning, which we utilized here, such a network is trained by providing many examples of the objective to classify, as well as many counter examples.</p>
      </sec>
      <sec>
        <title>Deep Neural Networks</title>
        <p>A Deep Neural Network (DNN) is a fully connected set of “layers,” each of which contains a node that encodes information in the form of a weight associated with a particular feature of the input data. By “connected” we mean that the nodes of each layer connect with the nodes of the next. A DNN is considered “deep” because it can contain many such connected nodes and/or layers, thereby encoding a significant amount of information in the weights applied to the input of each layer.</p>
        <p>In the case of text categorization, the input to the network is a set of words (or “word embeddings” described below). Each successive layer of the DNN applies some transformation to the words in the form of linear algebraic operations that progressively encodes more granular features of the data [<xref ref-type="bibr" rid="ref11">11</xref>,<xref ref-type="bibr" rid="ref12">12</xref>]. A supervised DNN, such as our approach, requires that each input (eg, set of words) is associated with a class such that the DNN will learn how to associate the words with each class in order to predict the class for newly unseen sets of words. As with most machine learning approaches, the input text can be transformed in a number of ways. In the case of text classification, such transformations could include adding extraneous information such as bibliographic and author information. This process of designing and applying features to optimize classification is known as <italic>feature engineering</italic>.</p>
        <p>Although potentially useful, feature engineering is challenging: it may require significant manual effort and introduces the risk that certain features will be too specific to the training data or may even be unavailable. As we discuss below, leveraging the MeSH terms used to index articles in PubMed can certainly help in a task such as ours, but there is no guarantee that such information is available for an article in a timely manner.</p>
        <p>Therefore, we opted for an “end-to-end” machine solution. In end-to-end approaches, the DNN is trained solely on the inputs and classes with minimal or no feature engineering. Minimal features are those that are task- and domain-agnostic, such as converting words to lower case, removing stopwords, and stemming. Potential advantages of such an approach include: (1) simpler design, therefore strong results are more likely indicative that the DNN is detecting textual signal, rather than an arcane feature; (2) no reliance on external factors, such as features that may not be timely available; and (3) mitigation of concept drift, since the training features may misalign from those available when a model is deployed. Therefore, end-to-end systems provide a strong justification for a first approach in classification tasks.</p>
      </sec>
      <sec>
        <title>Recurrent Neural Networks and Convolutional Neural Networks</title>
        <p>In this study, we utilized a particular deep-learning neural-network known as Convolutional Neural Network (CNN), following the approach of Kim [<xref ref-type="bibr" rid="ref13">13</xref>]. To some extent, Kim’s CNN architecture has become a <italic>de facto</italic> standard for text classification. CNNs analyze text using sliding word windows of specified sizes. Each sliding word window generates a set of real-valued vectors. Generally, each word or even character is associated with a “word-embedding,” which is a low-dimension real-valued vector that represents the semantic space for the word [<xref ref-type="bibr" rid="ref14">14</xref>]. Therefore, as each term is associated with a vector, each sliding word window then represents a matrix. Each sliding word window is then passed through an activation function, and a “max pooling” is applied such that only the maximum value is kept from the set of values produced by the activation function, as applied to the window. That is, each window is associated with its single, maximal value outputted by the activation function. These maximal values are concatenated together to form their own vector representing the set of windows. This set of concatenated values forms the next layer, which is then passed to the final layer, which includes the decision-making activation function (such as Softmax, as described below).</p>
        <p>An example of a CNN is shown in <xref ref-type="fig" rid="figure1">Figure 1</xref>. From left to right, we see one set of input words and their word embeddings, which forms the initial input matrix. This network uses two sets of sliding windows, one of size two and one of size three. These sets of sliding windows produce the convolutional layer, transforming the sliding window’s features into new feature values, which are then pooled such that only the maximum value is kept (the “max pooling”). Finally, the max-pooled values are passed through the fully connected final (output) layer, which uses Softmax to assign a probability of class membership (shown as “yes” or “no” for binary class membership). While this approach may appear “shallow,” it has been shown to be effective, becoming one of the most popular architectural choices for CNN [<xref ref-type="bibr" rid="ref13">13</xref>].</p>
        <p>Another popular approach for text-analysis tasks are Recurrent Neural Networks (RNNs). In contrast to CNN’s sliding-windows, which treat phrases somewhat independently, RNNs are well suited for language tasks where the classification of a particular piece of text depends on the surrounding text. For instance, RNNs are well suited for part-of-speech tagging or machine translation, which have a strong dependency on the particular word order. However, because they must consider order dependencies, they are not as appropriate for tasks such as ours. In fact, in a head-to-head comparison between CNNs and RNNs for natural language processing tasks, Yin et al [<xref ref-type="bibr" rid="ref15">15</xref>] found that CNNs are particularly well suited for so called “keyphrase recognition” tasks such as text classifications or sentiment analyses. Furthermore, CNNs were found to be up to five times faster than RNNs [<xref ref-type="bibr" rid="ref15">15</xref>], which is important in real-world tasks such as ours where the goal is to classify an extremely large corpus, such as PubMed, in a reasonable amount of time.</p>
      </sec>
      <sec>
        <title>Deep Neural Network Optimization (How It Learns)</title>
        <p>The main learning for a neural network involves “forward propagation” and “backward propagation.” In forward propagation, inputs are translated into features by transforming the inputs into real-valued vectors of fixed sizes. These vectors (eg, “layers”) are combined with weights and passed through an activation function that summarizes the contribution of each feature of the vector and its weight. Layers are connected to one another such that the values from the activation function of the current layer become the inputs to the next layer. Therefore, the “forward propagation” starts with input and passes activation values from layer to layer until the final layer, which outputs some decision vector. In our case, this final output function is a sigmoid activation function, which can assign probability to class membership. In “backward propagation” the final classification decision is compared with the known result from the training data and errors are propagated backward through the network, from the output layer to the input layer. Each weight is updated according to its contribution to the decision accuracy via gradient descent.</p>
        <p>In the context of CNN, one can interpret the various passes through “forward” propagation as applying weights to different “chunks” of the text input, and “backwards” propagation as adjusting those weights to make the fewest errors in predicting the class of the input text. Within the context of DNN, since optimization is essentially a weight adjustment process, the higher the number of nodes and layers, the more weights must be adjusted to find the optimal classifier, which requires more training data. Conversely, more weights and layers may improve classification. Therefore, part of DNN design is to identify optimal parameter choices and how to deal with overfitting. In our case, we used a technique called dropout regularization, which randomly prevents nodes from participating in a classification decision for a given training input, so the model does not overfit by learning to simply rely on a particular node.</p>
        <p>Other optimizations include which mathematical operations to choose for the propagation; this is called the “activation function” (ie, how a node produces a score given the weight and input). Different choices can result in different DNN behavior; some activation functions are more robust than others, while some can make the training process exceedingly long. We chose the Rectified Linear Unit (ReLU) for our activation function, as it provides an efficient mechanism to build robust and accurate CNNs. The choice of ReLU is quite common in tasks such as ours. Finally, within the context of CNN, it is common to provide a down-sampling between layers, which helps control overfitting and makes training more efficient. The most common approach is max pooling, which we use in our approach.</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>Example of a Convolutional Neural Network.</p>
          </caption>
          <graphic xlink:href="jmir_v20i6e10281_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <p>Therefore, the training of a network involves multiple passes of forward propagation followed by backward propagation. It is common to call each iteration over all the training data an “epoch.” The model generally stops this training process at a fixed number of epochs or when the metric of success appears to have reached some maximal value.</p>
      </sec>
      <sec>
        <title>Clinical Hedges</title>
        <p>Clinical Hedges is a database previously developed by the Hedges Group at McMaster University, used to develop and evaluate the Clinical Query filters [<xref ref-type="bibr" rid="ref5">5</xref>] and previous machine learning approaches [<xref ref-type="bibr" rid="ref8">8</xref>] that retrieve scientifically sound clinical studies from PubMed. The database has 50,594 articles published in 170 clinical journals. All articles were manually annotated by highly-calibrated information science experts according to type (eg, etiology, prognosis, diagnosis, prevention, therapy, clinical prediction) and whether or not each study met prespecified and experimentally validated methodological criteria for scientifically sound clinical research. The criteria and process used to rate the articles in Clinical Hedges are described elsewhere [<xref ref-type="bibr" rid="ref5">5</xref>]. In summary, criteria for scientifically sound studies on treatment interventions include random allocation of study participants, clinically relevant outcomes, and at least 80% follow-up of study participants.</p>
      </sec>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Overview of the Approach</title>
        <p>Overall, our approach consisted of (1) training and testing deep learning models with a large and noisy dataset obtained automatically through PubMed searches based on the Clinical Query treatment filter, and (2) evaluating the performance of the resulting model against Clinical Hedges as a gold standard.</p>
        <p>Specifically, the study method consisted of the following steps, which are described in more detail in the sections below: (1) preparation of a dataset for training the deep learning models, (2) training and tuning deep learning models, (3) comparison of the deep learning approach with state-of-the-art search filters and McMaster’s textword filter in terms of precision and recall, and (4) analysis of deep learning performance in terms of precision at several levels of K retrieved citations.</p>
      </sec>
      <sec>
        <title>Preparation of Training Dataset</title>
        <p>The training/testing dataset consisted of 403,216 positive and negative citations retrieved from PubMed. To retrieve <italic>positive studies</italic> (ie, scientifically sound), we used the Clinical Queries treatment filter tuned for precision (“narrow” filter; <xref ref-type="fig" rid="figure2">Figure 2</xref>). In previous studies, this filter yielded 93% recall and 54% precision for scientifically sound treatment studies in the Clinical Hedges gold standard [<xref ref-type="bibr" rid="ref5">5</xref>]. Therefore, this search strategy was used as a surrogate for retrieving a large dataset of scientifically sound studies that are similar to the ones in the Clinical Hedges gold standard. Although this approach produced a rather noisy training set (close to half of the positive samples were false-positives), the CNN approach is resilient to handle noisy data as long as there is sufficient training data. To retrieve <italic>negative studies</italic> (ie, not scientifically sound), we retrieved studies conducted in humans which were not retrieved by the “positive” search strategy above.</p>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>Datasets used for training and testing the deep learning models. The PubMed Clinical Query “Narrow” treatment filter was used as a surrogate to identify positive (scientifically sound) studies. The resulting dataset was split into training and development sets using a 90/10 ratio.</p>
          </caption>
          <graphic xlink:href="jmir_v20i6e10281_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <p>The strategies were limited to retrieve a maximum of 150,000 and 300,000 citations respectively to yield a dataset with one third positive and two thirds negative citations. Both strategies were limited to citations published between 2007 and 2017. Citations without an abstract were removed. The search strategies were executed with PubMed’s eUtils application program interface. The resulting dataset contained 147,182 positive and 256,034 negative citations (<xref ref-type="fig" rid="figure2">Figure 2</xref>).</p>
      </sec>
      <sec>
        <title>Training and Tuning Deep Learning Models</title>
        <p>Deep learning models were trained using 90% of the citations in the dataset, with the remaining 10% used as a “development” set (<xref ref-type="fig" rid="figure2">Figure 2</xref>). As the training/development split was randomly generated, the development set maintained a similar proportion of positive to negative instances as the training set. To build model inputs, we concatenated the title with the abstract, removed stopwords, and kept the first 650 tokens of the remaining words.</p>
        <p>As mentioned in the <italic>Deep Machine Learning</italic> section, our model follows the well-accepted approach of applying CNNs for text classification. The first layer applies character embedding to the words, so that words outside of the known vocabulary can be included for prediction. The character embeddings are then combined with word embeddings (built from the training data), to capture semantic similarity. This input is passed into our model, which contains two convolutional layers: one for sliding word windows of size two and one for word windows of size three. Each convolutional layer contains 512 filters associated with it. We apply a ReLU unit to the convolutional layers and pass them through a max pooling procedure. The resulting max-pooled features are then concatenated into a single layer. The max-pooled layer is passed to the next layer which consists of 512 units (fully connected), to which we apply a Softmax activation function to predict the probability of a citation belonging to either class. We then take the Argmax of the Softmax predictions as the predicted class. We ran this model with dropout regularization of 0.5 (to prevent overfitting) for 30 epochs. Hyper-parameters were chosen experimentally based on maximized precision on the training data.</p>
      </sec>
      <sec>
        <title>Comparison of the Deep Learning Approach With State-of-the-Art PubMed Search Strategies</title>
        <p>We tested three hypotheses that reflect the requirements imposed by different information retrieval scenarios. The first scenario consisted of search strategies to support the development of evidence-based syntheses, such as systematic reviews and clinical guidelines [<xref ref-type="bibr" rid="ref16">16</xref>]. In this scenario, there is a requirement for near perfect recall. The hypothesis for this scenario was that the deep learning approach would yield equivalent recall with higher precision for scientifically sound treatment studies compared with the PubMed Clinical Queries Broad filter, which has almost perfect recall (<xref ref-type="fig" rid="figure3">Figure 3</xref>).</p>
        <fig id="figure3" position="float">
          <label>Figure 3</label>
          <caption>
            <p>Search strategies used to retrieve scientifically sound treatment studies in comparison with the deep learning model.</p>
          </caption>
          <graphic xlink:href="jmir_v20i6e10281_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <p>The second scenario reflects the need to retrieve recent studies, such as in literature surveillance efforts to identify new evidence to update existing systematic reviews and clinical guidelines [<xref ref-type="bibr" rid="ref17">17</xref>-<xref ref-type="bibr" rid="ref19">19</xref>]. Since Clinical Query filters depend partially on MeSH terms and publication type, they are less effective for literature surveillance. Instead, search strategies based on terms in the citation title and abstract are preferred. The hypothesis for this scenario was that the deep learning approach would yield equivalent recall but higher precision for scientifically sound treatment studies compared with a textword search strategy provided by the Clinical Hedges group from McMaster University (<xref ref-type="fig" rid="figure3">Figure 3</xref>).</p>
        <p>The third scenario represents clinicians searching the literature for evidence to meet clinicians’ information needs that are raised in the care of a specific patient [<xref ref-type="bibr" rid="ref20">20</xref>]. In this scenario, trading a small loss in recall for substantial gains in precision is acceptable. We hypothesized that the deep learning approach would yield equivalent recall but higher precision for scientifically sound treatment studies compared with McMaster’s Balanced Clinical Query filter, which uses a combination of textwords, MeSH terms, and publication types (<xref ref-type="fig" rid="figure3">Figure 3</xref>).</p>
        <p>The Clinical Hedges gold standard was used to test the three hypotheses. For positive citations, we retrieved 1524 original scientifically sound studies, with a focus on treatment, from the Clinical Hedges database. For negative citations, we retrieved 29,144 treatment studies from Clinical Hedges that were not in the positive set. For statistical analyses, we split the resulting dataset into 20 random subsamples, which were stratified to ensure a balanced ratio of positive and negative citations in each subsample. Measures of precision, recall, and F-measure were obtained for the four approaches on each of the 20 subsamples (<xref ref-type="fig" rid="figure4">Figure 4</xref>). Last, we ranked the output of the deep learning model according to its probability score and obtained measures of precision at several levels of top K citations (10, 20, 50, 100, 200, 300, and 500).</p>
      </sec>
      <sec>
        <title>Statistical Analysis</title>
        <p>Classification performance was measured according to the average precision and recall across 20 data samples. We used the paired Student t-test to test the significance of the differences in recall and precision between the two approaches in each experiment, with the significance level set at 0.05.</p>
        <fig id="figure4" position="float">
          <label>Figure 4</label>
          <caption>
            <p>Evaluation method, including comparisons between the deep learning approach and Boolean searches focused on three different information retrieval scenarios.</p>
          </caption>
          <graphic xlink:href="jmir_v20i6e10281_fig4.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <p>The results are organized according to the three information retrieval scenarios and study hypotheses.</p>
      <sec>
        <title>Scenario 1 - Development of Evidence-Based Syntheses</title>
        <p><xref ref-type="table" rid="table1">Table 1</xref> shows the results of the comparisons for Scenario 1, which requires near perfect recall. We tested the hypothesis that <italic>the deep learning approach yields equivalent recall with higher precision for scientifically sound treatment studies compared with the PubMed Clinical Queries Broad filter</italic>. The Clinical Queries Broad filter had statistically significantly higher recall than the deep learning model (98.4% vs 96.9%; <italic>P</italic>=.002), although the difference was small (-1.6%) and likely marginal in practice, depending on the use case. The deep learning model had significantly higher precision than the Clinical Queries Broad filter, with a +12.2% absolute difference (34.6% vs 22.4%; <italic>P</italic>&#60;.001).</p>
      </sec>
      <sec>
        <title>Scenario 2 - Literature Surveillance</title>
        <p><xref ref-type="table" rid="table2">Table 2</xref> shows the results of the comparisons for Scenario 2, which requires retrieval of recent studies prior to MeSH indexing. We tested the hypothesis that <italic>the deep learning approach yields equivalent recall but higher precision for scientifically sound treatment studies compared with a textword search strategy</italic>. The deep learning model was equivalent to McMaster’s textword search in terms of recall (97.1% vs 96.9%; <italic>P</italic>=.57); and had significantly higher precision than the textword search (34.6% vs 28.5%; <italic>P</italic>&#60;.001).</p>
      </sec>
      <sec>
        <title>Scenario 3 - Patient Care Decision Support</title>
        <p><xref ref-type="table" rid="table3">Table 3</xref> shows the results of the comparisons for Scenario 3, in which trading a small loss in recall for gains in precision is acceptable. We tested the hypothesis that <italic>the deep learning approach yields equivalent recall but higher precision for scientifically sound treatment studies compared with McMaster’s Balanced Clinical Query filter.</italic> Compared with the McMaster Balanced treatment filter, the deep learning model had similar recall (96.9% vs 97.0%; <italic>P</italic>=.63), but lower precision (34.6% vs 40.9%; <italic>P</italic>&#60;.001; <xref ref-type="table" rid="table3">Table 3</xref>).</p>
      </sec>
      <sec>
        <title>Precision at K</title>
        <p>The precision at K curve for the ranked output of the deep learning model showed that precision ranged from 75.5% to 61% among the top 10 to top 100 citations and only decreased substantially after the top 200, 300, and 500 citations (<xref ref-type="fig" rid="figure5">Figure 5</xref>).</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Average recall, precision, and F-measure of the deep learning model and Clinical Query Broad filter according to the Clinical Hedges gold standard (N=20).</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="270"/>
            <col width="250"/>
            <col width="320"/>
            <col width="160"/>
            <thead>
              <tr valign="bottom">
                <td>Parameter</td>
                <td>Deep learning (%)</td>
                <td>CQ<sup>a</sup> broad (%)</td>
                <td><italic>P</italic> value</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Recall</td>
                <td>96.9</td>
                <td>98.4</td>
                <td>&#60;.001</td>
              </tr>
              <tr valign="top">
                <td>Precision</td>
                <td>34.6</td>
                <td>22.4</td>
                <td>&#60;.001</td>
              </tr>
              <tr valign="top">
                <td>F-measure</td>
                <td>51.0</td>
                <td>36.5</td>
                <td>&#60;.001</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table1fn1">
              <p><sup>a</sup>CQ: PubMed Clinical Query Treatment filter</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>Average recall, precision, and F-measure of the deep learning model and McMaster’s textword search according to the Clinical Hedges gold standard (N=20).</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="270"/>
            <col width="250"/>
            <col width="320"/>
            <col width="160"/>
            <thead>
              <tr valign="top">
                <td>Parameter</td>
                <td>Deep learning (%)</td>
                <td>Textword search (%)</td>
                <td><italic>P</italic> value</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Recall</td>
                <td>96.9</td>
                <td>97.1</td>
                <td>.57</td>
              </tr>
              <tr valign="top">
                <td>Precision</td>
                <td>34.6</td>
                <td>11.8</td>
                <td>&#60;.001</td>
              </tr>
              <tr valign="top">
                <td>F-measure</td>
                <td>51.0</td>
                <td>21.0</td>
                <td>&#60;.001</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
        <table-wrap position="float" id="table3">
          <label>Table 3</label>
          <caption>
            <p>Average recall, precision, and F-measure of the deep learning approach and McMaster’s Balanced Treatment filter according to the Clinical Hedges gold standard (N=20).</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="270"/>
            <col width="250"/>
            <col width="320"/>
            <col width="160"/>
            <thead>
              <tr valign="bottom">
                <td>Measure</td>
                <td>Deep learning (%)</td>
                <td>McMaster’s CQ<sup>a</sup> balanced filter (%)</td>
                <td><italic>P</italic> value</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Recall</td>
                <td>96.9</td>
                <td>97.0</td>
                <td>.63</td>
              </tr>
              <tr valign="top">
                <td>Precision</td>
                <td>34.6</td>
                <td>40.9</td>
                <td>&#60;.001</td>
              </tr>
              <tr valign="top">
                <td>F-measure</td>
                <td>51.0</td>
                <td>57.5</td>
                <td>&#60;.001</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table3fn1">
              <p><sup>a</sup>CQ: PubMed Clinical Query Treatment filter</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <fig id="figure5" position="float">
          <label>Figure 5</label>
          <caption>
            <p>Average precision of the deep learning model at different levels of top K citations.</p>
          </caption>
          <graphic xlink:href="jmir_v20i6e10281_fig5.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Significant Findings</title>
        <p>To our knowledge, this is the first study to investigate the use of deep learning techniques to identify reports of scientifically sound studies in the biomedical literature in three different information-seeking scenarios. The deep learning approach performed reasonably well compared with state-of-the-art search filters, especially for literature surveillance. For evidence synthesis, the deep learning approach had slightly lower recall (-1.6%), but significantly higher precision (+12.2%) than the PubMed Clinical Query Broad treatment filter. For literature surveillance, the deep learning approach had equivalent recall to McMaster’s textword filter, but significantly higher precision (+22.2%). For patient care decision-making, the deep learning model had similar recall, but lower precision (-6.3%) than McMaster’s Balanced filter. Strengths of the study methodology include the use of a very large training set, comparison with state-of-the-art search strategies, and evaluation with a rigorous gold standard which was completely independent from the training set.</p>
        <p>The proposed deep learning approach has three main potential benefits compared with previous approaches. First, unlike previous machine learning approaches, which depend on features that are not always openly and contemporaneously available (eg, MeSH terms, citation counts, journal impact factors), the proposed deep learning approach only uses citation title and abstract, which are available as soon as citations are entered in PubMed. Although full-text articles could be added as features in an attempt to improve performance, obtaining access to the full-text of all articles indexed in PubMed is impracticable since most journals do not provide open access to full-text. To assess the potential duration of delays for literature surveillance strategies based on MeSH filters, we determined the time between the date of creation of the article record in PubMed (CRDT) and the date of posting of MeSH terms (MHDA) for 107 journals (55,237 articles) in the McMaster PLUS database, from which the Clinical Hedges database was derived. The mean delay in MeSH indexing per journal was 162 days (95% CI 157-167), with a range of 17 to 328 days. Indexing intervals for journals were inversely correlated with journal impact factors (for 2016), but the correlation was relatively weak (-0.38; CI -0.199 to -0.517). As a second benefit, the deep learning model provides a ranked output with 70% or higher precision among the top 50 citations. This feature could be particularly useful for clinicians in busy clinical settings who are less likely to look beyond the top 20 citations that are displayed in PubMed searches [<xref ref-type="bibr" rid="ref20">20</xref>,<xref ref-type="bibr" rid="ref21">21</xref>]. In addition, citation ranking could help with systematic review development, since front-loading “eligible” citations can be used to help train and calibrate citation screeners and prioritize work [<xref ref-type="bibr" rid="ref22">22</xref>]. Third, the deep learning model obtained reasonable performance despite being trained on a noisy dataset (an estimate of roughly 50% of the positive cases were false-positives). This finding confirms the robustness of the deep learning approach, which is known to be resilient to noisy training data [<xref ref-type="bibr" rid="ref23">23</xref>].</p>
      </sec>
      <sec>
        <title>Comparison With Prior Work</title>
        <p>Previous work applied deep learning to classification tasks in the biomedical informatics domain. Lee [<xref ref-type="bibr" rid="ref24">24</xref>] classified sentences as belonging to papers that would be included in a systematic review, or those that would not. However, because they did not employ a large-scale training procedure, as we devised here, their results were poor. It is also not clear whether the author focused solely on sentence classification, or document classification, as in our work [<xref ref-type="bibr" rid="ref24">24</xref>]. Hughes et al [<xref ref-type="bibr" rid="ref25">25</xref>] applied CNNs to classify sentences according to one of 26 categories, such as “Brain” or “Cancer,” using a similar approach (though a different training procedure) to a different problem. Wang et al [<xref ref-type="bibr" rid="ref26">26</xref>] used word, dependency, and abstract meaning representation embeddings to extract information on drug-drug interactions from the biomedical literature. Both Nguyen et al [<xref ref-type="bibr" rid="ref27">27</xref>] and Che et al [<xref ref-type="bibr" rid="ref28">28</xref>] utilized CNNs to predict risk outcomes, such as hospital readmission, using electronic health record data as an input. As with Hughes et al [<xref ref-type="bibr" rid="ref25">25</xref>], although applied to different problems, the latter studies demonstrated precedent for using CNN in biomedical text classification.</p>
        <p>A polynomial Support Vector Machine classifier based on MeSH terms, publication type, and title/abstract words obtained a recall of 96% and precision of 18% against a gold standard of internal medicine articles included in the American College of Physicians Journal Club [<xref ref-type="bibr" rid="ref9">9</xref>]. A different study compared Clinical Query filters, machine learning, and algorithms based on citation count and the PageRank algorithm using a gold standard of important literature on common problems in surgical oncology [<xref ref-type="bibr" rid="ref10">10</xref>]. The PageRank algorithm obtained a precision at the top 10, 20, 50, and 100 citations of 7.8%, 13.0%, 19.9%, and 26.3%, respectively [<xref ref-type="bibr" rid="ref10">10</xref>]. Overall precision and recall were not reported. More recently, a study by Kilicoglu et al [<xref ref-type="bibr" rid="ref8">8</xref>] investigated a set of classifiers using features such as MeSH terms, title/abstract words, UMLS concepts, and semantic predications. A Naïve Bayes classifier with these features obtained a recall and precision of 91.4% and 52.5% for treatment studies in the Clinical Hedges database [<xref ref-type="bibr" rid="ref8">8</xref>]. As discussed above, those previous approaches relied on substantial feature engineering and/or proprietary and time-sensitive features, compromising the use of those approaches in real-time information retrieval systems. In a recent study investigating an approach similar to ours, Marshall et al [<xref ref-type="bibr" rid="ref29">29</xref>] developed CNN and support vector machine classifiers based on article title and abstract to identify reports of randomized controlled trials (RCTs). The best classifier obtained a recall of 98.5% and precision of 21% [<xref ref-type="bibr" rid="ref29">29</xref>]. Although the authors also evaluated their classifiers against the Clinical Hedges database, the results cannot be directly compared with our study because their goal was to identify RCTs versus scientifically sound studies (not all RCTs are scientifically sound and not all scientifically sound studies are RCTs). Another difference was that Marshall et al [<xref ref-type="bibr" rid="ref29">29</xref>] used a training set derived from RCTs identified in Cochrane systematic reviews while we used a dataset obtained using the Clinical Queries Treatment Narrow filter.</p>
      </sec>
      <sec>
        <title>Error Analysis</title>
        <p>We analyzed a random sample of 20 false-negatives and 20 false-positives identified by the deep learning model. The majority of the false-negatives (16/20) were likely due to the lack of an explicit description of the study design in the article abstract, which led the deep learning model to miss these articles. Of the 20 false-negatives, the Clinical Query Broad filter was able to correctly identify 14 articles based on MeSH terms and publication type rather than words in the abstract or title. Two approaches can be investigated in future studies to address this problem. First, MeSH terms and publication type could be included as deep learning features. The caveat is that this approach would require feature engineering and would be limited by the time lag of MeSH terms and publication type described above. The second, and perhaps more promising approach, is to include the methods section from the article full-text as an input for deep learning. Since the methods section has many more details on the study methodology than the article abstract, it may lead to more accurate classification of scientifically sound studies.</p>
        <p>False-positives were due to two main error categories. First, 7 of 20 cases were marginal articles that partially met quality criteria (eg, RCT without a clinical outcome) and therefore were more difficult to rate (7/20). Second, in 11 of 20 cases the abstract included terms related to high quality methodology but stated these outside the context of the study method (eg, abstract conclusion stating the need for future RCTs, editorial raising the need for RCTs on a specific topic). Approaches to mitigate both types of errors include using the full-text of the methods section as input for the deep learning model and developing separate subclassifiers to detect studies that meet partial quality criteria, and nonoriginal studies (eg, editorials, letters, reviews).</p>
      </sec>
      <sec>
        <title>Limitations</title>
        <p>Our study has four important limitations. First, although we focused on deep learning models and optimization strategies that were most likely to produce the best results, we have not exhausted all deep learning optimization possibilities. For instance, new work on RNNs may prove more accurate in document classification tasks [<xref ref-type="bibr" rid="ref30">30</xref>,<xref ref-type="bibr" rid="ref31">31</xref>]. We chose to focus our efforts on CNNs because they run more efficiently, given the large scale of our text data, but there is a valid investigation into understanding the trade-offs between speed and accuracy by comparing these methods. We also did not exhaustively search the hyper-parameter space for our CNN. Many of our choices were empirical, as this is the first study, and further efforts might leverage more systematic approaches to hyper-parameter tuning [<xref ref-type="bibr" rid="ref32">32</xref>]. Second, our approach is meant to be “end-to-end” (ie, text simply enters our pipeline and is classified). This approach is preferable because it does not require significant feature engineering or time-dependent features such as MeSH terms. However, further studies can explore adding richer features into our model to improve performance. For example, since the McMaster’s textword filter has equivalent recall as (but lower precision than) the Clinical Query filters, it is possible that MeSH-based features could improve the precision of our deep learning approach. Third, we have made comparisons with only one textword filter and no other machine learning approaches, since we did not have access to those machine learning classifiers. Comparisons with two of the three previous machine learning approaches are indirect, since those studies did not use Clinical Hedges as a gold standard. Last, we focused on identifying “treatment” studies; further work is needed to verify whether our approach generalizes to other areas, such as diagnosis, etiology, and prognosis.</p>
      </sec>
      <sec>
        <title>Conclusion</title>
        <p>We compared deep learning with state-of-the-art search filters to identify reports of scientifically sound studies in the biomedical literature. Overall, the resulting deep learning model compared well with other approaches, especially in scenarios involving recent citations prior to MeSH indexing. Advantages of the deep learning approach include low feature engineering requirements, no dependency on proprietary and time-sensitive features, and the use of a very large training set. Future work is needed to investigate further optimization opportunities and to adapt the deep learning approach to other clinical areas. Deep learning is a promising approach to identifying scientifically sound studies from the biomedical literature and warrants further investigation as a potential alternative for, or supplement to, current search filters.</p>
      </sec>
    </sec>
  </body>
  <back>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">CNN</term>
          <def>
            <p>Convolutional Neural Network</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">DNN</term>
          <def>
            <p>Deep Neural Network</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">MeSH</term>
          <def>
            <p>Medical Subject Heading</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">RCT</term>
          <def>
            <p>randomized controlled trial</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">ReLU</term>
          <def>
            <p>Rectified Linear Unit</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">RNN</term>
          <def>
            <p>Recurrent Neural Network</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">UMLS</term>
          <def>
            <p>Unified Medical Language System</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>GDF was funded by National Library of Medicine grant 1R01LM011416 and National Cancer Institute grant 1U24CA204800. Furthermore, this material is based upon work supported by the Defense Advanced Research Projects Agency (DARPA) Program Office under Contract No. W31P4Q-17-C-0103. The authors wish to acknowledge InferLink Corporation for their advice and support. The Clinical Hedges database was created with grants from the US National Library of Medicine and the Canadian Institutes of Health Research.</p>
    </ack>
    <fn-group>
      <fn fn-type="conflict">
        <p>MM is the Chief Scientist of InferLink Corp. and CEO of Evid Science, Inc, both of which could benefit from using the above approach as a feature within existing or new medical literature analysis products. GDF, AI, CC, and RBH have no competing interests to declare.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Haynes</surname>
            <given-names>RB</given-names>
          </name>
        </person-group>
        <article-title>Where's the meat in clinical journals?</article-title>
        <source>ACP Journal club</source>  
        <year>1993</year>  
        <volume>119</volume>  
        <issue>3</issue>  
        <fpage>A22</fpage> </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Ioannidis</surname>
            <given-names>JPA</given-names>
          </name>
        </person-group>
        <article-title>Why most published research findings are false</article-title>
        <source>PLoS Med</source>  
        <year>2005</year>  
        <month>08</month>  
        <volume>2</volume>  
        <issue>8</issue>  
        <fpage>e124</fpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="http://dx.plos.org/10.1371/journal.pmed.0020124"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1371/journal.pmed.0020124</pub-id>
        <pub-id pub-id-type="medline">16060722</pub-id>
        <pub-id pub-id-type="pii">04-PLME-E-0321R2</pub-id>
        <pub-id pub-id-type="pmcid">PMC1182327</pub-id></nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Haynes</surname>
            <given-names>RB</given-names>
          </name>
          <name name-style="western">
            <surname>Wilczynski</surname>
            <given-names>N</given-names>
          </name>
          <name name-style="western">
            <surname>McKibbon</surname>
            <given-names>KA</given-names>
          </name>
          <name name-style="western">
            <surname>Walker</surname>
            <given-names>CJ</given-names>
          </name>
          <name name-style="western">
            <surname>Sinclair</surname>
            <given-names>JC</given-names>
          </name>
        </person-group>
        <article-title>Developing optimal search strategies for detecting clinically sound studies in MEDLINE</article-title>
        <source>J Am Med Inform Assoc</source>  
        <year>1994</year>  
        <volume>1</volume>  
        <issue>6</issue>  
        <fpage>447</fpage>  
        <lpage>58</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/7850570"/>
        </comment>  
        <pub-id pub-id-type="medline">7850570</pub-id>
        <pub-id pub-id-type="pmcid">PMC116228</pub-id></nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Wilczynski</surname>
            <given-names>NL</given-names>
          </name>
          <name name-style="western">
            <surname>McKibbon</surname>
            <given-names>KA</given-names>
          </name>
          <name name-style="western">
            <surname>Haynes</surname>
            <given-names>RB</given-names>
          </name>
        </person-group>
        <article-title>Enhancing retrieval of best evidence for health care from bibliographic databases: calibration of the hand search of the literature</article-title>
        <source>Stud Health Technol Inform</source>  
        <year>2001</year>  
        <volume>84</volume>  
        <issue>Pt 1</issue>  
        <fpage>390</fpage>  
        <lpage>3</lpage>  
        <pub-id pub-id-type="medline">11604770</pub-id></nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Wilczynski</surname>
            <given-names>NL</given-names>
          </name>
          <name name-style="western">
            <surname>Morgan</surname>
            <given-names>D</given-names>
          </name>
          <name name-style="western">
            <surname>Haynes</surname>
            <given-names>RB</given-names>
          </name>
          <collab>Hedges Team</collab>
        </person-group>
        <article-title>An overview of the design and methods for retrieving high-quality studies for clinical care</article-title>
        <source>BMC Med Inform Decis Mak</source>  
        <year>2005</year>  
        <month>06</month>  
        <day>21</day>  
        <volume>5</volume>  
        <fpage>20</fpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcmedinformdecismak.biomedcentral.com/articles/10.1186/1472-6947-5-20"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1186/1472-6947-5-20</pub-id>
        <pub-id pub-id-type="medline">15969765</pub-id>
        <pub-id pub-id-type="pii">1472-6947-5-20</pub-id>
        <pub-id pub-id-type="pmcid">PMC1183213</pub-id></nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Wilczynski</surname>
            <given-names>NL</given-names>
          </name>
          <name name-style="western">
            <surname>McKibbon</surname>
            <given-names>KA</given-names>
          </name>
          <name name-style="western">
            <surname>Walter</surname>
            <given-names>SD</given-names>
          </name>
          <name name-style="western">
            <surname>Garg</surname>
            <given-names>AX</given-names>
          </name>
          <name name-style="western">
            <surname>Haynes</surname>
            <given-names>RB</given-names>
          </name>
        </person-group>
        <article-title>MEDLINE clinical queries are robust when searching in recent publishing years</article-title>
        <source>J Am Med Inform Assoc</source>  
        <year>2013</year>  
        <volume>20</volume>  
        <issue>2</issue>  
        <fpage>363</fpage>  
        <lpage>8</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/23019242"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1136/amiajnl-2012-001075</pub-id>
        <pub-id pub-id-type="medline">23019242</pub-id>
        <pub-id pub-id-type="pii">amiajnl-2012-001075</pub-id>
        <pub-id pub-id-type="pmcid">PMC3638187</pub-id></nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Irwin</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Rackham</surname>
            <given-names>D</given-names>
          </name>
        </person-group>
        <article-title>Comparison of the time-to-indexing in PubMed between biomedical journals according to impact factor, discipline, and focus</article-title>
        <source>Res Social Adm Pharm</source>  
        <year>2017</year>  
        <volume>13</volume>  
        <issue>2</issue>  
        <fpage>389</fpage>  
        <lpage>393</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1016/j.sapharm.2016.04.006"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1016/j.sapharm.2016.04.006</pub-id>
        <pub-id pub-id-type="medline">27215603</pub-id>
        <pub-id pub-id-type="pii">S1551-7411(16)30019-5</pub-id></nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Kilicoglu</surname>
            <given-names>H</given-names>
          </name>
          <name name-style="western">
            <surname>Demner-Fushman</surname>
            <given-names>D</given-names>
          </name>
          <name name-style="western">
            <surname>Rindflesch</surname>
            <given-names>TC</given-names>
          </name>
          <name name-style="western">
            <surname>Wilczynski</surname>
            <given-names>NL</given-names>
          </name>
          <name name-style="western">
            <surname>Haynes</surname>
            <given-names>RB</given-names>
          </name>
        </person-group>
        <article-title>Towards automatic recognition of scientifically rigorous clinical research evidence</article-title>
        <source>J Am Med Inform Assoc</source>  
        <year>2009</year>  
        <volume>16</volume>  
        <issue>1</issue>  
        <fpage>25</fpage>  
        <lpage>31</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/18952929"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1197/jamia.M2996</pub-id>
        <pub-id pub-id-type="medline">18952929</pub-id>
        <pub-id pub-id-type="pii">M2996</pub-id>
        <pub-id pub-id-type="pmcid">PMC2605595</pub-id></nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Aphinyanaphongs</surname>
            <given-names>Y</given-names>
          </name>
          <name name-style="western">
            <surname>Tsamardinos</surname>
            <given-names>I</given-names>
          </name>
          <name name-style="western">
            <surname>Statnikov</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Hardin</surname>
            <given-names>D</given-names>
          </name>
          <name name-style="western">
            <surname>Aliferis</surname>
            <given-names>CF</given-names>
          </name>
        </person-group>
        <article-title>Text categorization models for high-quality article retrieval in internal medicine</article-title>
        <source>J Am Med Inform Assoc</source>  
        <year>2005</year>  
        <volume>12</volume>  
        <issue>2</issue>  
        <fpage>207</fpage>  
        <lpage>16</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="http://jamia.oxfordjournals.org/cgi/pmidlookup?view=long&#38;pmid=15561789"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1197/jamia.M1641</pub-id>
        <pub-id pub-id-type="medline">15561789</pub-id>
        <pub-id pub-id-type="pii">M1641</pub-id>
        <pub-id pub-id-type="pmcid">PMC551552</pub-id></nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Bernstam</surname>
            <given-names>EV</given-names>
          </name>
          <name name-style="western">
            <surname>Herskovic</surname>
            <given-names>JR</given-names>
          </name>
          <name name-style="western">
            <surname>Aphinyanaphongs</surname>
            <given-names>Y</given-names>
          </name>
          <name name-style="western">
            <surname>Aliferis</surname>
            <given-names>CF</given-names>
          </name>
          <name name-style="western">
            <surname>Sriram</surname>
            <given-names>MG</given-names>
          </name>
          <name name-style="western">
            <surname>Hersh</surname>
            <given-names>WR</given-names>
          </name>
        </person-group>
        <article-title>Using citation data to improve retrieval from MEDLINE</article-title>
        <source>J Am Med Inform Assoc</source>  
        <year>2006</year>  
        <month>01</month>  
        <volume>13</volume>  
        <issue>1</issue>  
        <fpage>96</fpage>  
        <lpage>105</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/16221938"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1197/jamia.M1909</pub-id>
        <pub-id pub-id-type="medline">16221938</pub-id>
        <pub-id pub-id-type="pii">M1909</pub-id>
        <pub-id pub-id-type="pmcid">PMC1380202</pub-id></nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Lee</surname>
            <given-names>H</given-names>
          </name>
          <name name-style="western">
            <surname>Grosse</surname>
            <given-names>R</given-names>
          </name>
          <name name-style="western">
            <surname>Ranganath</surname>
            <given-names>R</given-names>
          </name>
          <name name-style="western">
            <surname>Ng</surname>
            <given-names>A</given-names>
          </name>
        </person-group>
        <article-title>Convolutional deep belief networks for scalable unsupervised learning of hierarchical representations</article-title>
        <year>2009</year>  
        <conf-name>Proceedings of the 26th annual international conference on machine learning</conf-name>
        <conf-date>2009</conf-date>
        <conf-loc>Montreal, Quebec, Canada</conf-loc>
        <publisher-name>ACM</publisher-name>
        <fpage>609</fpage>  
        <lpage>616</lpage> </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Lee</surname>
            <given-names>H</given-names>
          </name>
          <name name-style="western">
            <surname>Pham</surname>
            <given-names>P</given-names>
          </name>
          <name name-style="western">
            <surname>Largman</surname>
            <given-names>Y</given-names>
          </name>
          <name name-style="western">
            <surname>Ng</surname>
            <given-names>AY</given-names>
          </name>
        </person-group>
        <article-title>Unsupervised feature learning for audio classification using convolutional deep belief networks</article-title>
        <year>2009</year>  
        <conf-name>Advances in neural information processing systems</conf-name>
        <conf-date>2009</conf-date>
        <conf-loc>Vancouver, Canada</conf-loc>
        <fpage>1096</fpage>  
        <lpage>1104</lpage> </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="web">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Kim</surname>
            <given-names>Y</given-names>
          </name>
        </person-group>
        <source>arXiv</source>  
        <year>2014</year>  
        <access-date>2018-05-30</access-date>
        <comment>Convolutional neural networks for sentence classification 
        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1408.5882">https://arxiv.org/abs/1408.5882</ext-link>
        <ext-link ext-link-type="webcite" xlink:href="6zoXJPr96"/></comment> </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="web">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Mikolov</surname>
            <given-names>T</given-names>
          </name>
          <name name-style="western">
            <surname>Chen</surname>
            <given-names>K</given-names>
          </name>
          <name name-style="western">
            <surname>Corrado</surname>
            <given-names>G</given-names>
          </name>
          <name name-style="western">
            <surname>Dean</surname>
            <given-names>J</given-names>
          </name>
        </person-group>
        <source>arXiv</source>  
        <year>2013</year>  
        <access-date>2018-05-30</access-date>
        <comment>Efficient estimation of word representations in vector space 
        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1301.3781">https://arxiv.org/abs/1301.3781</ext-link>
        <ext-link ext-link-type="webcite" xlink:href="6zoXQzoQh"/></comment> </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="web">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Yin</surname>
            <given-names>W</given-names>
          </name>
          <name name-style="western">
            <surname>Kann</surname>
            <given-names>K</given-names>
          </name>
          <name name-style="western">
            <surname>Yu</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Schütze</surname>
            <given-names>H</given-names>
          </name>
        </person-group>
        <source>arXiv</source>  
        <year>2017</year>  
        <access-date>2018-05-30</access-date>
        <comment>Comparative study of CNN and RNN for natural language processing 
        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1702.01923">https://arxiv.org/abs/1702.01923</ext-link>
        <ext-link ext-link-type="webcite" xlink:href="6zoXVRoAx"/></comment> </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Liberati</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Altman</surname>
            <given-names>DG</given-names>
          </name>
          <name name-style="western">
            <surname>Tetzlaff</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Mulrow</surname>
            <given-names>C</given-names>
          </name>
          <name name-style="western">
            <surname>Gøtzsche</surname>
            <given-names>PC</given-names>
          </name>
          <name name-style="western">
            <surname>Ioannidis</surname>
            <given-names>JPA</given-names>
          </name>
          <name name-style="western">
            <surname>Clarke</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Devereaux</surname>
            <given-names>PJ</given-names>
          </name>
          <name name-style="western">
            <surname>Kleijnen</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Moher</surname>
            <given-names>D</given-names>
          </name>
        </person-group>
        <article-title>The PRISMA statement for reporting systematic reviews and meta-analyses of studies that evaluate healthcare interventions: explanation and elaboration</article-title>
        <source>BMJ</source>  
        <year>2009</year>  
        <volume>339</volume>  
        <fpage>b2700</fpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="http://www.bmj.com/cgi/pmidlookup?view=long&#38;pmid=19622552"/>
        </comment>  
        <pub-id pub-id-type="medline">19622552</pub-id>
        <pub-id pub-id-type="pmcid">PMC2714672</pub-id></nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Sampson</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Shojania</surname>
            <given-names>KG</given-names>
          </name>
          <name name-style="western">
            <surname>McGowan</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Daniel</surname>
            <given-names>R</given-names>
          </name>
          <name name-style="western">
            <surname>Rader</surname>
            <given-names>T</given-names>
          </name>
          <name name-style="western">
            <surname>Iansavichene</surname>
            <given-names>AE</given-names>
          </name>
          <name name-style="western">
            <surname>Ji</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Ansari</surname>
            <given-names>MT</given-names>
          </name>
          <name name-style="western">
            <surname>Moher</surname>
            <given-names>D</given-names>
          </name>
        </person-group>
        <article-title>Surveillance search techniques identified the need to update systematic reviews</article-title>
        <source>J Clin Epidemiol</source>  
        <year>2008</year>  
        <month>08</month>  
        <volume>61</volume>  
        <issue>8</issue>  
        <fpage>755</fpage>  
        <lpage>62</lpage>  
        <pub-id pub-id-type="doi">10.1016/j.jclinepi.2007.10.003</pub-id>
        <pub-id pub-id-type="medline">18586179</pub-id>
        <pub-id pub-id-type="pii">S0895-4356(07)00365-4</pub-id></nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Elliott</surname>
            <given-names>JH</given-names>
          </name>
          <name name-style="western">
            <surname>Synnot</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Turner</surname>
            <given-names>T</given-names>
          </name>
          <name name-style="western">
            <surname>Simmonds</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Akl</surname>
            <given-names>EA</given-names>
          </name>
          <name name-style="western">
            <surname>McDonald</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Salanti</surname>
            <given-names>G</given-names>
          </name>
          <name name-style="western">
            <surname>Meerpohl</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>MacLehose</surname>
            <given-names>H</given-names>
          </name>
          <name name-style="western">
            <surname>Hilton</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Tovey</surname>
            <given-names>D</given-names>
          </name>
          <name name-style="western">
            <surname>Shemilt</surname>
            <given-names>I</given-names>
          </name>
          <name name-style="western">
            <surname>Thomas</surname>
            <given-names>J</given-names>
          </name>
          <collab>Living Systematic Review Network</collab>
        </person-group>
        <article-title>Living systematic review: 1. Introduction-the why, what, when, and how</article-title>
        <source>J Clin Epidemiol</source>  
        <year>2017</year>  
        <month>11</month>  
        <volume>91</volume>  
        <fpage>23</fpage>  
        <lpage>30</lpage>  
        <pub-id pub-id-type="doi">10.1016/j.jclinepi.2017.08.010</pub-id>
        <pub-id pub-id-type="medline">28912002</pub-id>
        <pub-id pub-id-type="pii">S0895-4356(17)30636-4</pub-id></nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="book">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Shojania</surname>
            <given-names>K</given-names>
          </name>
          <name name-style="western">
            <surname>Sampson</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Ansari</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Ji</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Garritty</surname>
            <given-names>C</given-names>
          </name>
          <name name-style="western">
            <surname>Rader</surname>
            <given-names>T</given-names>
          </name>
          <name name-style="western">
            <surname>Moher</surname>
            <given-names>D</given-names>
          </name>
        </person-group>
        <source>AHRQ Technical Reviews. Updating Systematic Reviews</source>  
        <year>2007</year>  
        <publisher-loc>Rockville, MD</publisher-loc>
        <publisher-name>Agency for Healthcare Research and Quality (US)</publisher-name></nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Del Fiol</surname>
            <given-names>G</given-names>
          </name>
          <name name-style="western">
            <surname>Workman</surname>
            <given-names>TE</given-names>
          </name>
          <name name-style="western">
            <surname>Gorman</surname>
            <given-names>PN</given-names>
          </name>
        </person-group>
        <article-title>Clinical questions raised by clinicians at the point of care: a systematic review</article-title>
        <source>JAMA Intern Med</source>  
        <year>2014</year>  
        <month>05</month>  
        <volume>174</volume>  
        <issue>5</issue>  
        <fpage>710</fpage>  
        <lpage>8</lpage>  
        <pub-id pub-id-type="doi">10.1001/jamainternmed.2014.368</pub-id>
        <pub-id pub-id-type="medline">24663331</pub-id>
        <pub-id pub-id-type="pii">1846630</pub-id></nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Jansen</surname>
            <given-names>B</given-names>
          </name>
          <name name-style="western">
            <surname>Spink</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Saracevic</surname>
            <given-names>T</given-names>
          </name>
        </person-group>
        <article-title>Real life, real users, and real needs: a study and analysis of user queries on the web</article-title>
        <source>Inf Process Manag</source>  
        <year>2000</year>  
        <month>3</month>  
        <volume>36</volume>  
        <issue>2</issue>  
        <fpage>207</fpage>  
        <lpage>227</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1016/S0306-4573(99)00056-4"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1016/S0306-4573(99)00056-4</pub-id></nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Cohen</surname>
            <given-names>AM</given-names>
          </name>
          <name name-style="western">
            <surname>Ambert</surname>
            <given-names>K</given-names>
          </name>
          <name name-style="western">
            <surname>McDonagh</surname>
            <given-names>M</given-names>
          </name>
        </person-group>
        <article-title>Cross-topic learning for work prioritization in systematic review creation and update</article-title>
        <source>J Am Med Inform Assoc</source>  
        <year>2009</year>  
        <volume>16</volume>  
        <issue>5</issue>  
        <fpage>690</fpage>  
        <lpage>704</lpage>  
        <comment>
          <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/19567792"/>
        </comment>  
        <pub-id pub-id-type="doi">10.1197/jamia.M3162</pub-id>
        <pub-id pub-id-type="medline">19567792</pub-id>
        <pub-id pub-id-type="pii">M3162</pub-id>
        <pub-id pub-id-type="pmcid">PMC2744720</pub-id></nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Krause</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Sapp</surname>
            <given-names>B</given-names>
          </name>
          <name name-style="western">
            <surname>Howard</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Zhou</surname>
            <given-names>H</given-names>
          </name>
          <name name-style="western">
            <surname>Toshev</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Duerig</surname>
            <given-names>T</given-names>
          </name>
          <name name-style="western">
            <surname>Philbin</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Fei-Fei</surname>
            <given-names>L</given-names>
          </name>
        </person-group>
        <article-title>The unreasonable effectiveness of noisy data for fine-grained recognition</article-title>
        <year>2016</year>  
        <conf-name>European Conference on Computer Vision</conf-name>
        <conf-date>2016</conf-date>
        <conf-loc>Amsterdam, The Netherlands</conf-loc>
        <fpage>301</fpage>  
        <lpage>320</lpage> </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Lee</surname>
            <given-names>GE</given-names>
          </name>
        </person-group>
        <article-title>A study of convolutional neural networks for clinical document classification in systematic reviews</article-title>
        <source>SysReview at CLEF eHealth</source>  
        <year>2017</year>  
        <conf-name>CEUR Workshop</conf-name>
        <conf-date>2017</conf-date>
        <conf-loc>Dublin, Ireland</conf-loc></nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Hughes</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Li</surname>
            <given-names>I</given-names>
          </name>
          <name name-style="western">
            <surname>Kotoulas</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Suzumura</surname>
            <given-names>T</given-names>
          </name>
        </person-group>
        <article-title>Medical text classification using convolutional neural networks</article-title>
        <source>Stud Health Technol Inform</source>  
        <year>2017</year>  
        <volume>235</volume>  
        <fpage>246</fpage>  
        <lpage>250</lpage>  
        <pub-id pub-id-type="medline">28423791</pub-id></nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Wang</surname>
            <given-names>Y</given-names>
          </name>
          <name name-style="western">
            <surname>Liu</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Rastegar-Mojarad</surname>
            <given-names>M</given-names>
          </name>
          <name name-style="western">
            <surname>Wang</surname>
            <given-names>L</given-names>
          </name>
          <name name-style="western">
            <surname>Shen</surname>
            <given-names>F</given-names>
          </name>
          <name name-style="western">
            <surname>Liu</surname>
            <given-names>F</given-names>
          </name>
          <name name-style="western">
            <surname>Liu</surname>
            <given-names>H</given-names>
          </name>
        </person-group>
        <article-title>Dependency and AMR embeddings for drug-drug interaction extraction from biomedical literature</article-title>
        <year>2017</year>  
        <conf-name>8th ACM International Conference on Bioinformatics, Computational Biology, and Health Informatics</conf-name>
        <conf-date>2017</conf-date>
        <conf-loc>Boston, MA, USA</conf-loc></nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Nguyen</surname>
            <given-names>P</given-names>
          </name>
          <name name-style="western">
            <surname>Tran</surname>
            <given-names>T</given-names>
          </name>
          <name name-style="western">
            <surname>Wickramasinghe</surname>
            <given-names>N</given-names>
          </name>
          <name name-style="western">
            <surname>Venkatesh</surname>
            <given-names>S</given-names>
          </name>
        </person-group>
        <article-title>$mathtt {Deepr}$: a convolutional net for medical records</article-title>
        <source>IEEE J Biomed Health Inform</source>  
        <year>2017</year>  
        <month>12</month>  
        <volume>21</volume>  
        <issue>1</issue>  
        <fpage>22</fpage>  
        <lpage>30</lpage>  
        <pub-id pub-id-type="doi">10.1109/JBHI.2016.2633963</pub-id>
        <pub-id pub-id-type="medline">27913366</pub-id></nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="web">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Che</surname>
            <given-names>Z</given-names>
          </name>
          <name name-style="western">
            <surname>Cheng</surname>
            <given-names>Y</given-names>
          </name>
          <name name-style="western">
            <surname>Sun</surname>
            <given-names>Z</given-names>
          </name>
          <name name-style="western">
            <surname>Liu</surname>
            <given-names>Y</given-names>
          </name>
        </person-group>
        <source>arXiv</source>  
        <year>2017</year>  
        <access-date>2018-05-30</access-date>
        <comment>Exploiting convolutional neural network for risk prediction with medical feature embedding 
        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1701.07474">https://arxiv.org/abs/1701.07474</ext-link>
        <ext-link ext-link-type="webcite" xlink:href="6zoYNlfiE"/></comment> </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Marshall</surname>
            <given-names>I</given-names>
          </name>
          <name name-style="western">
            <surname>Noel-Storr</surname>
            <given-names>A</given-names>
          </name>
          <name name-style="western">
            <surname>Kuiper</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Thomas</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Wallace</surname>
            <given-names>B</given-names>
          </name>
        </person-group>
        <article-title>Machine learning for identifying randomized controlled trials: an evaluation and practitioner's guide</article-title>
        <source>Res Synth Methods</source>  
        <year>2018</year>  
        <month>01</month>  
        <day>04</day>  
        <fpage>1</fpage>  
        <lpage>13</lpage>  
        <pub-id pub-id-type="doi">10.1002/jrsm.1287</pub-id>
        <pub-id pub-id-type="medline">29314757</pub-id></nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Liu</surname>
            <given-names>P</given-names>
          </name>
          <name name-style="western">
            <surname>Qiu</surname>
            <given-names>X</given-names>
          </name>
          <name name-style="western">
            <surname>Huang</surname>
            <given-names>X</given-names>
          </name>
        </person-group>
        <article-title>Recurrent neural network for text classification with multi-task learning</article-title>
        <year>2016</year>  
        <conf-name>25th International Joint Conference on Artificial Intelligence</conf-name>
        <conf-date>2016</conf-date>
        <conf-loc>New York, NY, USA</conf-loc>
        <publisher-name>AAAI Press</publisher-name>
        <fpage>AAA</fpage> </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="confproc">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Lai</surname>
            <given-names>S</given-names>
          </name>
          <name name-style="western">
            <surname>Xu</surname>
            <given-names>L</given-names>
          </name>
          <name name-style="western">
            <surname>Liu</surname>
            <given-names>K</given-names>
          </name>
          <name name-style="western">
            <surname>Zhao</surname>
            <given-names>J</given-names>
          </name>
        </person-group>
        <article-title>Recurrent convolutional neural networks for text classification</article-title>
        <year>2015</year>  
        <conf-name>29th AAAI Conference on Artificial Intelligence</conf-name>
        <conf-date>2015</conf-date>
        <conf-loc>Austin, Texas, USA</conf-loc>
        <publisher-name>AAAI Press</publisher-name>
        <fpage>2267</fpage>  
        <lpage>2273</lpage> </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="journal">
        <person-group person-group-type="author">
          <name name-style="western">
            <surname>Bergstra</surname>
            <given-names>J</given-names>
          </name>
          <name name-style="western">
            <surname>Bengio</surname>
            <given-names>Y</given-names>
          </name>
        </person-group>
        <article-title>Random search for hyper-parameter optimization</article-title>
        <source>J Mach Learn Res</source>  
        <year>2012</year>  
        <volume>13</volume>  
        <fpage>281</fpage>  
        <lpage>305</lpage> </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
