<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMIR</journal-id>
      <journal-id journal-id-type="nlm-ta">J Med Internet Res</journal-id>
      <journal-title>Journal of Medical Internet Research</journal-title>
      <issn pub-type="epub">1438-8871</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v26i1e48996</article-id>
      <article-id pub-id-type="pmid">38214966</article-id>
      <article-id pub-id-type="doi">10.2196/48996</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Automated Paper Screening for Clinical Reviews Using Large Language Models: Data Analysis Study</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>de Azevedo Cardoso</surname>
            <given-names>Taiane</given-names>
          </name>
        </contrib>
        <contrib contrib-type="editor">
          <name>
            <surname>Eysenbach</surname>
            <given-names>Gunther</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Kang</surname>
            <given-names>Tian</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Chatzimina</surname>
            <given-names>Maria</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Bojic</surname>
            <given-names>Iva</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Guo</surname>
            <given-names>Eddie</given-names>
          </name>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>Cumming School of Medicine</institution>
            <institution>University of Calgary</institution>
            <addr-line>3330 University Dr NW</addr-line>
            <addr-line>Calgary, AB, T2N 1N4</addr-line>
            <country>Canada</country>
            <phone>1 5879880292</phone>
            <email>eddie.guo@ucalgary.ca</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-7223-0505</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author">
          <name name-style="western">
            <surname>Gupta</surname>
            <given-names>Mehul</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-7931-0666</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Deng</surname>
            <given-names>Jiawen</given-names>
          </name>
          <degrees>BHSc</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-8274-6468</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Park</surname>
            <given-names>Ye-Jean</given-names>
          </name>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0008-1068-8992</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author">
          <name name-style="western">
            <surname>Paget</surname>
            <given-names>Michael</given-names>
          </name>
          <degrees>BFA</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-3322-7661</ext-link>
        </contrib>
        <contrib id="contrib6" contrib-type="author">
          <name name-style="western">
            <surname>Naugler</surname>
            <given-names>Christopher</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-4570-1279</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Cumming School of Medicine</institution>
        <institution>University of Calgary</institution>
        <addr-line>Calgary, AB</addr-line>
        <country>Canada</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Temerty Faculty of Medicine</institution>
        <institution>University of Toronto</institution>
        <addr-line>Toronto, AB</addr-line>
        <country>Canada</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Eddie Guo <email>eddie.guo@ucalgary.ca</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <year>2024</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>12</day>
        <month>1</month>
        <year>2024</year>
      </pub-date>
      <volume>26</volume>
      <elocation-id>e48996</elocation-id>
      <history>
        <date date-type="received">
          <day>14</day>
          <month>5</month>
          <year>2023</year>
        </date>
        <date date-type="rev-request">
          <day>30</day>
          <month>8</month>
          <year>2023</year>
        </date>
        <date date-type="rev-recd">
          <day>30</day>
          <month>8</month>
          <year>2023</year>
        </date>
        <date date-type="accepted">
          <day>28</day>
          <month>9</month>
          <year>2023</year>
        </date>
      </history>
      <copyright-statement>©Eddie Guo, Mehul Gupta, Jiawen Deng, Ye-Jean Park, Michael Paget, Christopher Naugler. Originally published in the Journal of Medical Internet Research (https://www.jmir.org), 12.01.2024.</copyright-statement>
      <copyright-year>2024</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in the Journal of Medical Internet Research, is properly cited. The complete bibliographic information, a link to the original publication on https://www.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://www.jmir.org/2024/1/e48996" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>The systematic review of clinical research papers is a labor-intensive and time-consuming process that often involves the screening of thousands of titles and abstracts. The accuracy and efficiency of this process are critical for the quality of the review and subsequent health care decisions. Traditional methods rely heavily on human reviewers, often requiring a significant investment of time and resources.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>This study aims to assess the performance of the OpenAI generative pretrained transformer (GPT) and GPT-4 application programming interfaces (APIs) in accurately and efficiently identifying relevant titles and abstracts from real-world clinical review data sets and comparing their performance against ground truth labeling by 2 independent human reviewers.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>We introduce a novel workflow using the Chat GPT and GPT-4 APIs for screening titles and abstracts in clinical reviews. A Python script was created to make calls to the API with the screening criteria in natural language and a corpus of title and abstract data sets filtered by a minimum of 2 human reviewers. We compared the performance of our model against human-reviewed papers across 6 review papers, screening over 24,000 titles and abstracts.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>Our results show an accuracy of 0.91, a macro <italic>F</italic><sub>1</sub>-score of 0.60, a sensitivity of excluded papers of 0.91, and a sensitivity of included papers of 0.76. The interrater variability between 2 independent human screeners was κ=0.46, and the prevalence and bias-adjusted κ between our proposed methods and the consensus-based human decisions was κ=0.96. On a randomly selected subset of papers, the GPT models demonstrated the ability to provide reasoning for their decisions and corrected their initial decisions upon being asked to explain their reasoning for incorrect classifications.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>Large language models have the potential to streamline the clinical review process, save valuable time and effort for researchers, and contribute to the overall quality of clinical reviews. By prioritizing the workflow and acting as an aid rather than a replacement for researchers and reviewers, models such as GPT-4 can enhance efficiency and lead to more accurate and reliable conclusions in medical research.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>abstract screening</kwd>
        <kwd>Chat GPT</kwd>
        <kwd>classification</kwd>
        <kwd>extract</kwd>
        <kwd>extraction</kwd>
        <kwd>free text</kwd>
        <kwd>GPT</kwd>
        <kwd>GPT-4</kwd>
        <kwd>language model</kwd>
        <kwd>large language models</kwd>
        <kwd>LLM</kwd>
        <kwd>natural language processing</kwd>
        <kwd>NLP</kwd>
        <kwd>nonopiod analgesia</kwd>
        <kwd>review methodology</kwd>
        <kwd>review methods</kwd>
        <kwd>screening</kwd>
        <kwd>systematic review</kwd>
        <kwd>systematic</kwd>
        <kwd>unstructured data</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <p>Knowledge synthesis, the process of integrating and summarizing relevant studies in the literature to gain an improved understanding of a topic, is a key component in identifying knowledge gaps and informing future research endeavors on a topic of interest [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref2">2</xref>]. Systematic and scoping reviews are among the most commonly used and rigorous forms of knowledge synthesis across multiple disciplines [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref2">2</xref>]. Given that the results from systematic and scoping reviews can inform guidelines, protocols, and decision-making processes, particularly for stakeholders in the realms of health care, the quality of the evidence presented by such reviews can significantly impact generated recommendations [<xref ref-type="bibr" rid="ref3">3</xref>].</p>
      <p>The quality of systematic and scoping reviews is highly dependent on the comprehensiveness of the database searches and the subsequent article screening processes. Overlooking relevant articles during these critical steps can lead to bias [<xref ref-type="bibr" rid="ref4">4</xref>], while including discrepant studies can yield misleading conclusions and increase discordant heterogeneity [<xref ref-type="bibr" rid="ref5">5</xref>]. Thus, guidelines surrounding the conduct of clinical reviews, such as the Cochrane Handbook [<xref ref-type="bibr" rid="ref6">6</xref>], recommend that article screening be completed in duplicate by at least 2 independent reviewers.</p>
      <p>However, duplicate screening effectively doubles the financial and human resources needed to complete systematic reviews compared to single screening. This is especially problematic for small research groups, review projects with broad inclusion criteria (such as network meta-analyses), or time-constrained review projects (such as reviews relating to COVID-19 during the early stages of the pandemic) [<xref ref-type="bibr" rid="ref7">7</xref>,<xref ref-type="bibr" rid="ref8">8</xref>]. Additionally, there is often substantial interrater variability in screening decisions, leading to additional time spent on discussions to resolve disagreements [<xref ref-type="bibr" rid="ref9">9</xref>]. Due to the time constraints and wasted resources that are often features of duplicate screening, research studies may also include a more tailored, sensitive search strategy that can lead to missing several articles during the retrieval process [<xref ref-type="bibr" rid="ref10">10</xref>]. Furthermore, although the nuances of each study differ, many systematic reviews may contain thousands of retrieved articles, only to exclude the majority (ie, up to 90%) from the title and abstract screening [<xref ref-type="bibr" rid="ref10">10</xref>,<xref ref-type="bibr" rid="ref11">11</xref>].</p>
      <p>Recent developments in artificial intelligence and machine learning have made it possible to semiautomate or fully automate repetitive steps within the systematic review workflow [<xref ref-type="bibr" rid="ref12">12</xref>-<xref ref-type="bibr" rid="ref14">14</xref>]. Prominent examples of such applications include RobotReviewer [<xref ref-type="bibr" rid="ref15">15</xref>], TrialStreamer [<xref ref-type="bibr" rid="ref16">16</xref>], Research Screener [<xref ref-type="bibr" rid="ref7">7</xref>], DistillerSR [<xref ref-type="bibr" rid="ref17">17</xref>], and Abstrackr [<xref ref-type="bibr" rid="ref18">18</xref>], which are artificial intelligence models developed to extract information from scientific articles or abstracts to judge study quality and infer treatment effects. More specifically, RobotReviewer (2016) was shown to have similar capabilities to assess the risk of bias assessment as a human reviewer, only differing by around 7% in accuracy [<xref ref-type="bibr" rid="ref19">19</xref>]. Similarly, TrialStreamer was a system developed to extract key elements of information from full texts, such as inferring which interventions in a clinical paper worked best, along with comparisons in study outcomes between all relevant extracted full texts of a topic indexed on MEDLINE [<xref ref-type="bibr" rid="ref20">20</xref>].</p>
      <p>While there have been previous attempts at automating the title and abstract screening process, they often involved labor- or computationally-intensive labeling, pretraining, or vectorizations [<xref ref-type="bibr" rid="ref21">21</xref>]. For instance, Rayyan and Abstrackr are 2 free web tools that provide a semiautomated approach to article filtering by using natural language processing algorithms to learn when and where a reviewer includes or excludes an article and subsequently mimics a similar approach [<xref ref-type="bibr" rid="ref22">22</xref>,<xref ref-type="bibr" rid="ref23">23</xref>]. Rayyan also demonstrated high specificity, wherein 98% of all relevant articles were included after the tool had screened 75% of all articles to be analyzed in a study [<xref ref-type="bibr" rid="ref24">24</xref>]. While automation using these tools was found to save time, there was still minimal to substantive risk that there would be missing studies if the tool were fully independent or automated [<xref ref-type="bibr" rid="ref22">22</xref>,<xref ref-type="bibr" rid="ref23">23</xref>]. Furthermore, current programs may use previously standard methods, including n-grams, in comparison to more updated techniques, such as the generative pretrained transformer (GPT) model, which is trained with data from a general domain and does not require additional training to learn embeddings that can represent the semantics and contexts of words in relation to other words [<xref ref-type="bibr" rid="ref25">25</xref>,<xref ref-type="bibr" rid="ref26">26</xref>].</p>
      <p>In this paper, we introduce a novel workflow to screen titles and abstracts for clinical reviews by providing plain language prompts to the publicly available OpenAI GPT application programming interface (API). We aimed to assess GPT models’ ability to accurately and efficiently identify relevant titles and abstracts from real-world clinical review data sets, as well as their ability to explain their decisions and reflect on incorrect classifications. We compare the performance of our model against ground truth labeling by 2 independent human reviewers across 6 review papers in the screening of over 24,000 titles and abstracts.</p>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Overview</title>
        <p>In our study, we obtained a corpus of title and abstract data sets that have already been filtered by a minimum of 2 human reviewers to train our model (<xref rid="figure1" ref-type="fig">Figure 1</xref>). Subsequently, we created a Python script that provides the screening criteria for each paper to the OpenAI Chat GPT or GPT-4 API, depending on the input token length. We then passed each paper to the API using a consistent instruction prompt to determine whether a paper should be included or excluded based on the contents of its title and abstract. The overall accuracy (computed by dividing papers selected by both GPT and human reviewers by the total number of papers), sensitivity of both included and excluded papers, and interrater reliability through Cohen κ and prevalence-adjusted and bias-adjusted κ (PABAK) were computed against the human-reviewed papers:</p>
        <p>
          <graphic xlink:href="jmir_v26i1e48996_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </p>
        <p>Where <italic>k</italic> is the number of categories and <italic>p<sub>obs</sub></italic> is the proportion of included papers. All data and code are available in Mendeley data sets [<xref ref-type="bibr" rid="ref27">27</xref>].</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>Overview of the Python script to automate screening with the generative pretrained transformer (GPT) application programming interface (API). LLM: large language model.</p>
          </caption>
          <graphic xlink:href="jmir_v26i1e48996_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Data Collection</title>
        <p>To validate our proposed inclusion and exclusion methodology, we obtained 6 title and abstract screening data sets from different systematic and scoping reviews previously published by the authors of this study, each screened by 2 independent reviewers with conflicts resolved through consensus. These projects cover various medical science topics and vary in size, methodology, and complexity of screening criteria (<xref ref-type="table" rid="table1">Table 1</xref> and Table S1 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> [<xref ref-type="bibr" rid="ref28">28</xref>-<xref ref-type="bibr" rid="ref33">33</xref>]). We obtained the inclusion and exclusion decision from expert reviewers for each title and abstract entry, as well as the criteria provided to the expert reviewers during the screening process. A summary of the review characteristics is presented in <xref ref-type="table" rid="table2">Table 2</xref>.</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Included studies and their characteristics. The first 5 data sets are systematic reviews with meta-analyses. The last study is a scoping review.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="320"/>
            <col width="120"/>
            <col width="140"/>
            <col width="240"/>
            <col width="180"/>
            <thead>
              <tr valign="top">
                <td>Study title</td>
                <td>Data set name</td>
                <td>Included studies (538/24,307), n/N</td>
                <td>Study type</td>
                <td>Study topic</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td><italic>Efficacy and Safety of Ivermectin for the Treatment of COVID-19: A Systematic Review and Meta-Analysis</italic> [<xref ref-type="bibr" rid="ref29">29</xref>]</td>
                <td>IVM<sup>a</sup></td>
                <td>35/279</td>
                <td>Systematic review and meta-analysis of randomized and nonrandomized trials</td>
                <td>COVID-19 treatment and antimalarials</td>
              </tr>
              <tr valign="top">
                <td><italic>Efficacy and Safety of Selective Serotonin Reuptake Inhibitors in COVID-19 Management: A Systematic Review and Meta-Analysis</italic> [<xref ref-type="bibr" rid="ref30">30</xref>]</td>
                <td>SSRI<sup>b</sup></td>
                <td>29/3989</td>
                <td>Systematic review and meta-analysis of randomized and nonrandomized trials</td>
                <td>COVID-19 treatment and antidepressants</td>
              </tr>
              <tr valign="top">
                <td><italic>Efficacy of Lopinavir-Ritonavir Combination Therapy for the Treatment of Hospitalized COVID-19 Patients: A Meta-Analysis</italic> [<xref ref-type="bibr" rid="ref31">31</xref>]</td>
                <td>LPVR<sup>c</sup></td>
                <td>91/1456</td>
                <td>Systematic review and meta-analysis of randomized and nonrandomized trials</td>
                <td>COVID-19 treatment and antiretrovirals</td>
              </tr>
              <tr valign="top">
                <td><italic>The Use of Acupuncture in Patients With Raynaud’s Syndrome: A Systematic Re-View and Meta-Analysis of Randomized Controlled Trials</italic> [<xref ref-type="bibr" rid="ref32">32</xref>]</td>
                <td>RAYNAUDS<sup>d</sup></td>
                <td>6/942</td>
                <td>Systematic review and meta-analysis of randomized and nonrandomized trials</td>
                <td>Raynaud syndrome and acupuncture</td>
              </tr>
              <tr valign="top">
                <td><italic>Comparative Efficacy of Adjuvant Non-Opioid Analgesia in Adult Cardiac Surgical Patients: A Network Meta-Analysis</italic> [<xref ref-type="bibr" rid="ref33">33</xref>]</td>
                <td>NOA<sup>e</sup></td>
                <td>354/14,771</td>
                <td>Systematic review and meta-analysis of randomized and nonrandomized trials</td>
                <td>Postoperative pain and analgesics</td>
              </tr>
              <tr valign="top">
                <td>
                  <italic>Assessing the Research Landscape and Utility of LLMs<sup>f</sup> in the Clinical Setting: Protocol for a Scoping Review</italic>
                  <sup>g</sup>
                </td>
                <td>LLM</td>
                <td>23/2870</td>
                <td>Scoping review</td>
                <td>Machine learning in clinical medicine</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table1fn1">
              <p><sup>a</sup>IVM: ivermectin.</p>
            </fn>
            <fn id="table1fn2">
              <p><sup>b</sup>SSRI: selective serotonin reuptake inhibitor.</p>
            </fn>
            <fn id="table1fn3">
              <p><sup>c</sup>LPVR: lopinavir-ritonavir.</p>
            </fn>
            <fn id="table1fn4">
              <p><sup>d</sup>RAYNAUDS: Raynaud syndrome.</p>
            </fn>
            <fn id="table1fn5">
              <p><sup>e</sup>NOA: nonopioid analgesia.</p>
            </fn>
            <fn id="table1fn6">
              <p><sup>f</sup>LLM: large language model.</p>
            </fn>
            <fn id="table1fn7">
              <p><sup>g</sup>Registered with Open Science Framework [<xref ref-type="bibr" rid="ref28">28</xref>].</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>Data formatting for the Python script automating screening with the generative pretrained transformer application programming interface. All non-English characters were removed before analysis.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="330"/>
            <col width="670"/>
            <thead>
              <tr valign="top">
                <td>Data</td>
                <td>Columns</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>df_info</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Dataset Name (str): name of data set</p>
                    </list-item>
                    <list-item>
                      <p>Inclusion Criteria (str): screening inclusion criteria</p>
                    </list-item>
                    <list-item>
                      <p>Exclusion Criteria (str): screening exclusion criteria</p>
                    </list-item>
                  </list>
                </td>
              </tr>
              <tr valign="top">
                <td>Dataset<sup>a</sup></td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Title (str): paper title</p>
                    </list-item>
                    <list-item>
                      <p>Abstract (str): paper abstract</p>
                    </list-item>
                  </list>
                </td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table2fn1">
              <p><sup>a</sup>The name of the data set must match Dataset Name in df_info.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>App Creation</title>
        <p>Given a data set, df_info, containing information about inclusion and exclusion criteria of the data sets containing titles and abstracts to be reviewed, the app calls the OpenAI GPT API to classify each paper to be screened as either included or excluded. The app was coded in Python. The prompt given to the GPT API is provided in <xref ref-type="boxed-text" rid="box1">Textbox 1</xref>.</p>
        <boxed-text id="box1" position="float">
          <title>Prompt given to the generative pretrained transformer application programming interface.</title>
          <p><bold>Instructions:</bold> You are a researcher rigorously screening titles and abstracts of scientific papers for inclusion or exclusion in a review paper. Use the criteria below to inform your decision. If any exclusion criteria are met or not all inclusion criteria are met, exclude the article. If all inclusion criteria are met, include the article. Only type “included” or “excluded” to indicate your decision. Do not type anything else.</p>
          <p><bold>Abstract:</bold> {abstract}</p>
          <p><bold>Inclusion criteria:</bold> {inclusion_criteria}</p>
          <p><bold>Exclusion criteria:</bold> {exclusion_criteria}</p>
          <p>
            <bold>Decision:</bold>
          </p>
          <p>Where “Decision:” is whether GPT API includes or excludes the article. Thus, the algorithm is as follows:</p>
          <p>data_df &#60;- load(df_info)</p>
          <p>for each dataset in data_df: for each row in dataset:</p>
          <p>prompt &#60;- instructions + title + abstract + inclusion criteria \</p>
          <p>+ exclusion criteria decision &#60;- GPT(prompt) row[‘decision’] &#60;- decision</p>
          <p>save(dataset)</p>
        </boxed-text>
      </sec>
      <sec>
        <title>Assessment and Data Analysis</title>
        <p>After the app was run on all data sets included in our analysis, the following metrics were computed: accuracy, macro <italic>F</italic><sub>1</sub>-score, sensitivity for decision tags, κ, and PABAK. A subset of the results was selected for the GPT models to explain their reasoning. The following prompt was appended to the beginning of the original prompt given to the API: “Explain your reasoning for the decision given with the information below.” The human and GPT decisions were appended to the end of the prompt. A subset of incorrect results was selected for GPT to reflect on its incorrect answers. The following prompt was appended to the beginning of the original prompt given to the API: “Explain your reasoning for why the decision given was incorrect with the information below.” The human and GPT decisions were appended to the end of the prompt.</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <p>The overall accuracy of the GPT models was 0.91, the sensitivity of included papers was 0.76, and the sensitivity of excluded papers was 0.91 (<xref ref-type="table" rid="table3">Table 3</xref> and <xref rid="figure2" ref-type="fig">Figure 2</xref>). On the nonopioid analgesia (NOA) data set (354/14,771 included abstracts), the model ran for 643 minutes and 50.8 seconds, with an approximate cost of US $25. The data set characteristics are detailed in <xref ref-type="table" rid="table1">Table 1</xref>, the model performance is in <xref ref-type="table" rid="table3">Table 3</xref> and visualized in <xref rid="figure2" ref-type="fig">Figure 2</xref>, and the reasoning from GPT is tabulated in <xref ref-type="table" rid="table4">Table 4</xref>.</p>
      <table-wrap position="float" id="table3">
        <label>Table 3</label>
        <caption>
          <p>Performance of generative pretrained transformer (GPT) in screening titles and abstracts against a human reviewer’s ground truth. κ (human) is the agreement between 2 independent human reviewers. κ (screen) is the agreement between GPT and the final papers included and excluded in each data set.</p>
        </caption>
        <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
          <col width="180"/>
          <col width="90"/>
          <col width="130"/>
          <col width="160"/>
          <col width="170"/>
          <col width="90"/>
          <col width="90"/>
          <col width="90"/>
          <thead>
            <tr valign="top">
              <td>Data set</td>
              <td>Accuracy</td>
              <td>Macro <italic>F</italic><sub>1</sub>-score</td>
              <td>Sensitivity (included)</td>
              <td>Sensitivity (excluded)</td>
              <td>κ (human)</td>
              <td>κ (screen)</td>
              <td>PABAK<sup>a</sup></td>
            </tr>
          </thead>
          <tbody>
            <tr valign="top">
              <td>IVM<sup>b</sup></td>
              <td>0.748</td>
              <td>0.610</td>
              <td>0.686</td>
              <td>0.756</td>
              <td>0.72</td>
              <td>0.26</td>
              <td>0.78</td>
            </tr>
            <tr valign="top">
              <td>SSRI<sup>c</sup></td>
              <td>0.846</td>
              <td>0.595</td>
              <td>0.966</td>
              <td>0.949</td>
              <td>0.58</td>
              <td>0.21</td>
              <td>0.99</td>
            </tr>
            <tr valign="top">
              <td>LPVR<sup>d</sup></td>
              <td>0.949</td>
              <td>0.613</td>
              <td>0.593</td>
              <td>0.862</td>
              <td>0.51</td>
              <td>0.25</td>
              <td>0.88</td>
            </tr>
            <tr valign="top">
              <td>RAYNAUDS<sup>e</sup></td>
              <td>0.965</td>
              <td>0.607</td>
              <td>0.833</td>
              <td>0.966</td>
              <td>0.91</td>
              <td>0.22</td>
              <td>0.99</td>
            </tr>
            <tr valign="top">
              <td>NOA<sup>f</sup></td>
              <td>0.895</td>
              <td>0.601</td>
              <td>0.782</td>
              <td>0.898</td>
              <td>0.35</td>
              <td>0.23</td>
              <td>0.95</td>
            </tr>
            <tr valign="top">
              <td>LLM<sup>g</sup></td>
              <td>0.943</td>
              <td>0.594</td>
              <td>1.000</td>
              <td>0.942</td>
              <td>0.69</td>
              <td>0.21</td>
              <td>0.98</td>
            </tr>
            <tr valign="top">
              <td>Total (weighted)</td>
              <td>0.907</td>
              <td>0.600</td>
              <td>0.764</td>
              <td>0.910</td>
              <td>0.46</td>
              <td>0.22</td>
              <td>0.96</td>
            </tr>
            <tr valign="top">
              <td>Total (macro)</td>
              <td>0.891</td>
              <td>0.664</td>
              <td>0.810</td>
              <td>0.900</td>
              <td>0.63</td>
              <td>0.23</td>
              <td>0.93</td>
            </tr>
          </tbody>
        </table>
        <table-wrap-foot>
          <fn id="table3fn1">
            <p><sup>a</sup>PABAK: prevalence-adjusted and bias-adjusted κ.</p>
          </fn>
          <fn id="table3fn2">
            <p><sup>b</sup>IVM: ivermectin.</p>
          </fn>
          <fn id="table3fn3">
            <p><sup>c</sup>SSRI: selective serotonin reuptake inhibitor.</p>
          </fn>
          <fn id="table3fn4">
            <p><sup>d</sup>LPVR: lopinavir-ritonavir.</p>
          </fn>
          <fn id="table3fn5">
            <p><sup>e</sup>RAYNAUDS: Raynaud syndrome.</p>
          </fn>
          <fn id="table3fn6">
            <p><sup>f</sup>NOA: nonopioid analgesia.</p>
          </fn>
          <fn id="table3fn7">
            <p><sup>g</sup>LLM: large language model.</p>
          </fn>
        </table-wrap-foot>
      </table-wrap>
      <fig id="figure2" position="float">
        <label>Figure 2</label>
        <caption>
          <p>(A-F) Confusion matrices for the included and excluded papers for all data sets and (G) the overall performance. IVM: ivermectin; LLM: large language model; LPVR: lopinavir-ritonavir; NOA: nonopioid analgesia; RAYNAUDS: Raynaud’s syndrome; SSRI: selective serotonin reuptake inhibitor.</p>
        </caption>
        <graphic xlink:href="jmir_v26i1e48996_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
      </fig>
      <table-wrap position="float" id="table4">
        <label>Table 4</label>
        <caption>
          <p>Decision and reasoning of the generative pretrained transformer (GPT) application programming interface for a subset of 4 example titles and abstracts. Full abstracts are in ivermectin (IVM) data set [<xref ref-type="bibr" rid="ref27">27</xref>].</p>
        </caption>
        <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
          <col width="430"/>
          <col width="570"/>
          <thead>
            <tr valign="top">
              <td>Prompt</td>
              <td>Decision and reasoning</td>
            </tr>
          </thead>
          <tbody>
            <tr valign="top">
              <td>
                <list list-type="bullet">
                  <list-item>
                    <p>Title:
                  Use of Ivermectin in the Treatment of COVID-19: A Pilot Trial</p>
                  </list-item>
                  <list-item>
                    <p>Abstract: Objectives: In this randomized open-label trial pilot study we assessed the antiviral effects and safety of various doses of ivermectin in patients with mild clinical symptoms of COVID-19. Method(s): Patients were randomly assigned to receive standard of care (SOC) treatment at hospital admission...</p>
                  </list-item>
                </list>
              </td>
              <td>
                <list list-type="bullet">
                  <list-item>
                    <p>Human: Included</p>
                  </list-item>
                  <list-item>
                    <p>GPT: Included</p>
                  </list-item>
                  <list-item>
                    <p>Reasoning: The study meets all of the inclusion criteria. It is a randomized controlled trial that compares ivermectin against standard of care treatment in adult COVID-19 patients. The study reports outcomes related to time to viral clearance and adverse events. Additionally, the study is published in a peer-reviewed journal. Therefore, the article should be included in the review paper.</p>
                  </list-item>
                </list>
              </td>
            </tr>
            <tr valign="top">
              <td>
                <list list-type="bullet">
                  <list-item>
                    <p>Title:
                  What Does the COVID-19 Pandemic Mean for the Next Decade of Onchocerciasis Control and Elimination?</p>
                  </list-item>
                  <list-item>
                    <p>Abstract: Background: Mass drug administration (MDA) of ivermectin for onchocerciasis has been disrupted by the coronavirus disease 2019 (COVID-19) pandemic. Mathematical modelling can help predict how missed/delayed MDA will affect short-term epidemiological trends and elimination prospects by 2030...</p>
                  </list-item>
                </list>
              </td>
              <td>
                <list list-type="bullet">
                  <list-item>
                    <p>Human: Excluded</p>
                  </list-item>
                  <list-item>
                    <p>GPT: Excluded</p>
                  </list-item>
                  <list-item>
                    <p>Reasoning: The article does not meet any of the inclusion criteria or exclusion criteria listed. It is not a randomized controlled trial or comparative cohort study, it does not compare ivermectin against standard of care or control group/placebo, it does not include adult COVID-19 inpatients and/or outpatients, and it does not report any of the specified outcomes. Additionally, there is no indication that the article is non-peer-reviewed. Therefore, the article is excluded.</p>
                  </list-item>
                </list>
              </td>
            </tr>
            <tr valign="top">
              <td>
                <list list-type="bullet">
                  <list-item>
                    <p>Title:
                  Use of Ivermectin Is Associated With Lower Mortality in Hospitalized Patients With Coronavirus Disease 2019: The Ivermectin in COVID Nineteen Study</p>
                  </list-item>
                  <list-item>
                    <p>Abstract: Background: Ivermectin was shown to inhibit severe acute respiratory syndrome coronavirus 2 replication in vitro, which has led to off-label use, but clinical efficacy has not been described previously. Research Question: Does ivermectin benefit hospitalized coronavirus disease 2019 (COVID-19) patients?...</p>
                  </list-item>
                </list>
              </td>
              <td>
                <list list-type="bullet">
                  <list-item>
                    <p>Human: Included</p>
                  </list-item>
                  <list-item>
                    <p>GPT: Excluded</p>
                  </list-item>
                  <list-item>
                    <p>Reasoning: The article meets all of the inclusion criteria and none of the exclusion criteria, therefore it should be included in the review paper.</p>
                  </list-item>
                  <list-item>
                    <p>Reflection: The given decision of excluding the article is incorrect. The article meets all the inclusion criteria as it is a non-randomized comparative cohort study that compares ivermectin against standard of care or control group/placebo, includes adult COVID-19 inpatients, and reports mortality incidence. The article does not use ivermectin for prophylaxis of COVID-19 and is a peer-reviewed article. Therefore, the correct decision should be “Included.”</p>
                  </list-item>
                </list>
              </td>
            </tr>
            <tr valign="top">
              <td>
                <list list-type="bullet">
                  <list-item>
                    <p>Title:
                  What to Expect From Different Drugs Used in the Treatment of COVID-19: A Study on Applications and in Vivo and in Vitro Results</p>
                  </list-item>
                  <list-item>
                    <p>Abstract: The end of 2019 was marked by the emergence of a new type of coronavirus (SARS-CoV-2), which has killed more than 240,000 people...</p>
                  </list-item>
                </list>
              </td>
              <td>
                <list list-type="bullet">
                  <list-item>
                    <p>Human: Excluded</p>
                  </list-item>
                  <list-item>
                    <p>GPT: Included</p>
                  </list-item>
                  <list-item>
                    <p>Reasoning: The study is not a randomized controlled trial or a non-randomized comparative cohort study, which is an inclusion criteria. Therefore, the article does not meet all inclusion criteria and must be excluded.</p>
                  </list-item>
                  <list-item>
                    <p>Reflection: As per the given inclusion and exclusion criteria, the decision given is incorrect. The abstract does not mention the use of ivermectin in any of the studies. Therefore, the article cannot be included based on the inclusion criteria.</p>
                  </list-item>
                </list>
              </td>
            </tr>
          </tbody>
        </table>
      </table-wrap>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Overview</title>
        <p>In this study, we assessed the performance of the OpenAI GPT API in the context of clinical review paper inclusion and exclusion criteria selection. We report an overall accuracy of 0.91 and a PABAK of 0.96, indicating a high level of agreement between the app’s decisions and the reference standard. However, the κ was low, ranging from 0.21 to 0.26, largely due to the imbalanced nature of the data sets in this study. The sensitivity of the included papers was 0.76, suggesting that the app needs improvement to correctly identify relevant papers (<xref ref-type="table" rid="table3">Table 3</xref> and <xref rid="figure2" ref-type="fig">Figure 2</xref>). The sensitivity of excluded papers was 0.91, showing promise in excluding irrelevant papers. These results highlight the potential of large language models (LLMs) to support the clinical review process.</p>
      </sec>
      <sec>
        <title>Implications of GPT API’s Performance in the Review Process</title>
        <p>GPT’s performance has several implications for the efficiency and consistency of clinical review paper inclusion and exclusion criteria selection. By prioritizing the workflow and acting as an aid rather than a replacement for researchers and reviewers, the GPT and other large language models have the potential to streamline the review process. This enhanced efficiency could save valuable time and effort for researchers and clinicians, allowing them to focus on more complex tasks and in-depth analysis. Further, the API does not require pretraining or seed articles and can provide reasoning for its decision to either include or exclude papers, an aspect traditional natural language processing algorithms lack in automated or semiautomated paper screening (<xref ref-type="table" rid="table4">Table 4</xref>). Interestingly, upon being asked to explain its reasoning for a subset of incorrect classifications, GPT corrected its initial decision. Ultimately, this increased efficiency, paired with reasoning capabilities, could contribute to the overall quality of clinical reviews, leading to more accurate and reliable conclusions in medical research.</p>
        <p>The use of LLMs in the review process could also promote consistency in the selection of relevant papers. By automating certain aspects of the process and acting as an aid to researchers and clinicians, the model can streamline the review process and help reduce the potential for human error and bias, leading to more objective and reliable results [<xref ref-type="bibr" rid="ref34">34</xref>]. This increased consistency could, in turn, improve the overall quality of the evidence synthesized in clinical reviews, providing a more robust foundation for medical decision-making and the development of clinical guidelines.</p>
        <p>The potential of LLMs as a decision tool becomes particularly valuable when resources are limited. In such situations, LLMs can be used as a first-pass decision aid, streamlining the review process, and allowing human screeners to focus on a smaller, more relevant subset of papers. By automating the initial screening process, LLMs can help reduce the workload for researchers and clinicians, enabling them to allocate their time and effort more efficiently.</p>
        <p>In particular, using the GPT API as a first-pass decision aid can also help mitigate the risk of human error and bias in the initial screening phase, promoting a more objective and consistent selection of papers. While the API’s sensitivity for including relevant papers may not be perfect, its high specificity for excluding irrelevant papers can still provide valuable support in narrowing down the pool of potentially relevant studies [<xref ref-type="bibr" rid="ref10">10</xref>]. This can be particularly beneficial in situations where a large number of papers need to be screened and human resources are scarce [<xref ref-type="bibr" rid="ref35">35</xref>].</p>
      </sec>
      <sec>
        <title>Comparison to Other Tools</title>
        <p>The comparison of our proposed machine learning method to other tools, such as Abstrackr [<xref ref-type="bibr" rid="ref18">18</xref>], DistillerSR [<xref ref-type="bibr" rid="ref17">17</xref>], and RobotAnalyst [<xref ref-type="bibr" rid="ref36">36</xref>], provides evidence of its efficacy and reliability in the context of systematic review processes. On a data set of 24,307 abstracts and titles, our model achieved an accuracy of 0.91 and comparable sensitivity of 0.91 and 0.76 for excluded and included papers, respectively. The significant interrater agreement (κ=0.96) between our proposed method and consensus-based human decisions, juxtaposed to the lower interrater variability between 2 independent human screeners (κ=0.46), emphasizes the model’s robustness. In comparison, Abstrackr reported overall sensitivities of 0.96, 0.79, 0.92, and 0.82 on data sets ranging from 5243 to 47,385 records. When comparing the proportion of missed records across Abstrackr, DistillerSR, and RobotAnalyst on nonpublic medical title and abstract screening data sets, Abstrackr exhibited the lowest proportions of missed records, namely 28%, 5%, and 0%, respectively [<xref ref-type="bibr" rid="ref37">37</xref>]. Conversely, DistillerSR showed a high proportion of missed records, reaching up to 100% in the last data set. RobotAnalyst’s performance fell between the 2, with missed proportions of 70%, 23%, and 100%, respectively. Future work will explore comparative analyses in greater depth and on a broader array of data sets to compare state-of-the-art screening tools.</p>
      </sec>
      <sec>
        <title>Limitations and Challenges in Implementing GPT API in the Review Process</title>
        <p>While the GPT API shows promise in streamlining the review process, it is important to acknowledge its limitations and challenges. One notable limitation is the disparity between the high specificity of 0.91 for excluding papers and the lower sensitivity of 0.76 for including papers. This discrepancy suggests that while the API effectively excludes irrelevant papers, it may not be as proficient in identifying relevant papers for inclusion. This could lead to the omission of important studies in the review process, potentially affecting the comprehensiveness and quality of the final review. Therefore, the GPT API should not be considered a replacement for human expertise. Instead, it should be viewed as a complementary tool that can enhance the efficiency and consistency of the review process. Human screeners should still be involved in the final decision-making process, particularly in cases where the API’s sensitivity for including relevant papers may be insufficient [<xref ref-type="bibr" rid="ref7">7</xref>]. Another limitation arises in the selection of data sets for screening; 3 of the 6 data sets focused on the efficacy of various drugs for COVID-19, potentially limiting the generalizability of the results from other types of studies. Further work will assess GPT on a greater diversity of studies. By combining the strengths of the GPT API with human expertise, researchers can optimize the review process and ensure the accuracy and comprehensiveness of the final review.</p>
      </sec>
      <sec>
        <title>Future Research and Development</title>
        <p>Several avenues for future research and development include refining the GPT API’s performance in the clinical review paper context, incorporating metadata such as study type and year, and exploring few-shot learning approaches. Additionally, training a generator-discriminator model through fine-tuning could improve the API’s performance [<xref ref-type="bibr" rid="ref38">38</xref>]. Expanding the application of the GPT API to other areas of medical research or literature review could also be explored. This would involve large language models for tasks such as identifying and extracting study design information, patient characteristics, and adverse events. As the maximum token length increases with future iterations of the GPT model, screening entire papers may become feasible [<xref ref-type="bibr" rid="ref39">39</xref>]. Furthermore, exploring the use of LLMs to generate clinical review papers could be a promising research direction.</p>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>The GPT API shows potential as a valuable tool for improving the efficiency and consistency of clinical review paper inclusion and exclusion criteria selection. While there are limitations and challenges to its implementation, its performance in this study suggests that it could have a broader impact on clinical review paper writing and medical research. Future research and development should focus on refining the API’s performance, expanding its applications, and exploring its potential in other aspects of clinical research.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>Included studies and their inclusion and exclusion criteria.</p>
        <media xlink:href="jmir_v26i1e48996_app1.docx" xlink:title="DOCX File , 20 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">API</term>
          <def>
            <p>application programming interface</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">GPT</term>
          <def>
            <p>generative pretrained transformer</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">LLM</term>
          <def>
            <p>large language model</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">NOA</term>
          <def>
            <p>nonopioid analgesia</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">PABAK</term>
          <def>
            <p>prevalence and bias-adjusted kappa</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>We would like to acknowledge the following expert reviewers for providing the screening decisions in the review data sets used in this study and for agreeing to make the data sets publicly available: Abhinav Pillai, Mike Paget, Christopher Naugler, Kiyan Heybati, Fangwen Zhou, Myron Moskalyk, Saif Ali, Chi Yi Wong, Wenteng Hou, Umaima Abbas, Qi Kang Zuo, Emma Huang, Daniel Rayner, Cristian Garcia, Harikrishnaa Ba Ramaraju, Oswin Chang, Zachary Silver, Thanansayan Dhivagaran, Elena Zheng, and Shayan Heybati.</p>
    </ack>
    <fn-group>
      <fn fn-type="con">
        <p>EG contributed to conceptualization, methodology, software, formal analysis, investigation, writing the original draft, reviewing, editing, visualization, supervision, and project administration. MG was responsible for conceptualization, methodology, investigation, writing the original draft, reviewing, editing, supervision, and project administration. JD and YJP were involved in methodology, software, formal analysis, investigation, data curation, writing the original draft, and visualization. MP and CN contributed to writing, reviewing, and editing.</p>
      </fn>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sargeant</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>O'Connor</surname>
              <given-names>AM</given-names>
            </name>
          </person-group>
          <article-title>Scoping reviews, systematic reviews, and meta-analysis: applications in veterinary medicine</article-title>
          <source>Front Vet Sci</source>
          <year>2020</year>
          <volume>7</volume>
          <fpage>11</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/32047759"/>
          </comment>
          <pub-id pub-id-type="doi">10.3389/fvets.2020.00011</pub-id>
          <pub-id pub-id-type="medline">32047759</pub-id>
          <pub-id pub-id-type="pmcid">PMC6997489</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Garritty</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Stevens</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Hamel</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Golfam</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Hutton</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Wolfe</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Knowledge synthesis in evidence-based medicine</article-title>
          <source>Semin Nucl Med</source>
          <year>2019</year>
          <volume>49</volume>
          <issue>2</issue>
          <fpage>136</fpage>
          <lpage>144</lpage>
          <pub-id pub-id-type="doi">10.1053/j.semnuclmed.2018.11.006</pub-id>
          <pub-id pub-id-type="medline">30819393</pub-id>
          <pub-id pub-id-type="pii">S0001-2998(18)30095-3</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Luchini</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Veronese</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Nottegar</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Shin</surname>
              <given-names>JI</given-names>
            </name>
            <name name-style="western">
              <surname>Gentile</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Granziol</surname>
              <given-names>U</given-names>
            </name>
            <name name-style="western">
              <surname>Soysal</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Alexinschi</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Smith</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Solmi</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Assessing the quality of studies in meta-research: review/guidelines on the most important quality assessment tools</article-title>
          <source>Pharm Stat</source>
          <year>2021</year>
          <volume>20</volume>
          <issue>1</issue>
          <fpage>185</fpage>
          <lpage>195</lpage>
          <pub-id pub-id-type="doi">10.1002/pst.2068</pub-id>
          <pub-id pub-id-type="medline">32935459</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gartlehner</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Affengruber</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Titscher</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Noel-Storr</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Dooley</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Ballarini</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>König</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <article-title>Single-reviewer abstract screening missed 13 percent of relevant studies: a crowd-based, randomized controlled trial</article-title>
          <source>J Clin Epidemiol</source>
          <year>2020</year>
          <volume>121</volume>
          <fpage>20</fpage>
          <lpage>28</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S0895-4356(19)30982-5"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jclinepi.2020.01.005</pub-id>
          <pub-id pub-id-type="medline">31972274</pub-id>
          <pub-id pub-id-type="pii">S0895-4356(19)30982-5</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Fletcher</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>What is heterogeneity and is it important?</article-title>
          <source>BMJ</source>
          <year>2007</year>
          <volume>334</volume>
          <issue>7584</issue>
          <fpage>94</fpage>
          <lpage>96</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/17218716"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/bmj.39057.406644.68</pub-id>
          <pub-id pub-id-type="medline">17218716</pub-id>
          <pub-id pub-id-type="pii">334/7584/94</pub-id>
          <pub-id pub-id-type="pmcid">PMC1767262</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <collab>The Cochrane Collaboration</collab>
            <name name-style="western">
              <surname>Higgins</surname>
              <given-names>JPT</given-names>
            </name>
            <name name-style="western">
              <surname>Thomas</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <source>Cochrane Handbook for Systematic Reviews of Interventions</source>
          <year>2021</year>
          <publisher-loc>London</publisher-loc>
          <publisher-name>The Cochrane Collaboration</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chai</surname>
              <given-names>KEK</given-names>
            </name>
            <name name-style="western">
              <surname>Lines</surname>
              <given-names>RLJ</given-names>
            </name>
            <name name-style="western">
              <surname>Gucciardi</surname>
              <given-names>DF</given-names>
            </name>
            <name name-style="western">
              <surname>Ng</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Research screener: a machine learning tool to semi-automate abstract screening for systematic reviews</article-title>
          <source>Syst Rev</source>
          <year>2021</year>
          <volume>10</volume>
          <issue>1</issue>
          <fpage>93</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://systematicreviewsjournal.biomedcentral.com/articles/10.1186/s13643-021-01635-3"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s13643-021-01635-3</pub-id>
          <pub-id pub-id-type="medline">33795003</pub-id>
          <pub-id pub-id-type="pii">10.1186/s13643-021-01635-3</pub-id>
          <pub-id pub-id-type="pmcid">PMC8017894</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Clark</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>McFarlane</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Cleo</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Ramos</surname>
              <given-names>CI</given-names>
            </name>
            <name name-style="western">
              <surname>Marshall</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>The impact of systematic review automation tools on methodological quality and time taken to complete systematic review tasks: case study</article-title>
          <source>JMIR Med Educ</source>
          <year>2021</year>
          <volume>7</volume>
          <issue>2</issue>
          <fpage>e24418</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://mededu.jmir.org/2021/2/e24418/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/24418</pub-id>
          <pub-id pub-id-type="medline">34057072</pub-id>
          <pub-id pub-id-type="pii">v7i2e24418</pub-id>
          <pub-id pub-id-type="pmcid">PMC8204237</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Tuijn</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Janssens</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Robben</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>van den Bergh</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Reducing interrater variability and improving health care: a meta-analytical review</article-title>
          <source>J Eval Clin Pract</source>
          <year>2012</year>
          <volume>18</volume>
          <issue>4</issue>
          <fpage>887</fpage>
          <lpage>895</lpage>
          <pub-id pub-id-type="doi">10.1111/j.1365-2753.2011.01705.x</pub-id>
          <pub-id pub-id-type="medline">21726359</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rathbone</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Carter</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Hoffmann</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Glasziou</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Better duplicate detection for systematic reviewers: evaluation of systematic review assistant-deduplication module</article-title>
          <source>Syst Rev</source>
          <year>2015</year>
          <volume>4</volume>
          <issue>1</issue>
          <fpage>6</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://systematicreviewsjournal.biomedcentral.com/articles/10.1186/2046-4053-4-6"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/2046-4053-4-6</pub-id>
          <pub-id pub-id-type="medline">25588387</pub-id>
          <pub-id pub-id-type="pii">2046-4053-4-6</pub-id>
          <pub-id pub-id-type="pmcid">PMC4320616</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Polanin</surname>
              <given-names>JR</given-names>
            </name>
            <name name-style="western">
              <surname>Pigott</surname>
              <given-names>TD</given-names>
            </name>
            <name name-style="western">
              <surname>Espelage</surname>
              <given-names>DL</given-names>
            </name>
            <name name-style="western">
              <surname>Grotpeter</surname>
              <given-names>JK</given-names>
            </name>
          </person-group>
          <article-title>Best practice guidelines for abstract screening large‐evidence systematic reviews and meta‐analyses</article-title>
          <source>Res Synth Methods</source>
          <year>2019</year>
          <volume>10</volume>
          <issue>3</issue>
          <fpage>330</fpage>
          <lpage>342</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://onlinelibrary.wiley.com/doi/10.1002/jrsm.1354"/>
          </comment>
          <pub-id pub-id-type="doi">10.1002/jrsm.1354</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Marshall</surname>
              <given-names>IJ</given-names>
            </name>
            <name name-style="western">
              <surname>Wallace</surname>
              <given-names>BC</given-names>
            </name>
          </person-group>
          <article-title>Toward systematic review automation: a practical guide to using machine learning tools in research synthesis</article-title>
          <source>Syst Rev</source>
          <year>2019</year>
          <volume>8</volume>
          <issue>1</issue>
          <fpage>163</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://systematicreviewsjournal.biomedcentral.com/articles/10.1186/s13643-019-1074-9"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s13643-019-1074-9</pub-id>
          <pub-id pub-id-type="medline">31296265</pub-id>
          <pub-id pub-id-type="pii">10.1186/s13643-019-1074-9</pub-id>
          <pub-id pub-id-type="pmcid">PMC6621996</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Marshall</surname>
              <given-names>IJ</given-names>
            </name>
            <name name-style="western">
              <surname>Trikalinos</surname>
              <given-names>TA</given-names>
            </name>
            <name name-style="western">
              <surname>Soboczenski</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Yun</surname>
              <given-names>HS</given-names>
            </name>
            <name name-style="western">
              <surname>Kell</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Marshall</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Wallace</surname>
              <given-names>BC</given-names>
            </name>
          </person-group>
          <article-title>In a pilot study, automated real-time systematic review updates were feasible, accurate, and work-saving</article-title>
          <source>J Clin Epidemiol</source>
          <year>2023</year>
          <volume>153</volume>
          <fpage>26</fpage>
          <lpage>33</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S0895-4356(22)00213-X"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jclinepi.2022.08.013</pub-id>
          <pub-id pub-id-type="medline">36150548</pub-id>
          <pub-id pub-id-type="pii">S0895-4356(22)00213-X</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Blaizot</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Veettil</surname>
              <given-names>SK</given-names>
            </name>
            <name name-style="western">
              <surname>Saidoung</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Moreno-Garcia</surname>
              <given-names>CF</given-names>
            </name>
            <name name-style="western">
              <surname>Wiratunga</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Aceves-Martins</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Lai</surname>
              <given-names>NM</given-names>
            </name>
            <name name-style="western">
              <surname>Chaiyakunapruk</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <article-title>Using artificial intelligence methods for systematic review in health sciences: a systematic review</article-title>
          <source>Res Synth Methods</source>
          <year>2022</year>
          <volume>13</volume>
          <issue>3</issue>
          <fpage>353</fpage>
          <lpage>362</lpage>
          <pub-id pub-id-type="doi">10.1002/jrsm.1553</pub-id>
          <pub-id pub-id-type="medline">35174972</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Marshall</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Kuiper</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Banner</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Wallace</surname>
              <given-names>BC</given-names>
            </name>
          </person-group>
          <article-title>Automating biomedical evidence synthesis: RobotReviewer</article-title>
          <year>2017</year>
          <conf-name>Proceedings of the 55th Annual Meeting of the Association for Computational Linguistics-System Demonstrations</conf-name>
          <conf-date>July 30-August 4, 2017</conf-date>
          <conf-loc>Vancouver, Canada</conf-loc>
          <publisher-name>Association for Computational Linguistics</publisher-name>
          <fpage>7</fpage>
          <lpage>12</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://aclanthology.org/P17-4002.pdf"/>
          </comment>
          <pub-id pub-id-type="doi">10.18653/v1/p17-4002</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Marshall</surname>
              <given-names>IJ</given-names>
            </name>
            <name name-style="western">
              <surname>Nye</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Kuiper</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Noel-Storr</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Marshall</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Maclean</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Soboczenski</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Nenkova</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Thomas</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Wallace</surname>
              <given-names>BC</given-names>
            </name>
          </person-group>
          <article-title>Trialstreamer: a living, automatically updated database of clinical trial reports</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2020</year>
          <volume>27</volume>
          <issue>12</issue>
          <fpage>1903</fpage>
          <lpage>1912</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/32940710"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/jamia/ocaa163</pub-id>
          <pub-id pub-id-type="medline">32940710</pub-id>
          <pub-id pub-id-type="pii">5907063</pub-id>
          <pub-id pub-id-type="pmcid">PMC7727361</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hamel</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Kelly</surname>
              <given-names>SE</given-names>
            </name>
            <name name-style="western">
              <surname>Thavorn</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Rice</surname>
              <given-names>DB</given-names>
            </name>
            <name name-style="western">
              <surname>Wells</surname>
              <given-names>GA</given-names>
            </name>
            <name name-style="western">
              <surname>Hutton</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>An evaluation of DistillerSR's machine learning-based prioritization tool for title/abstract screening—impact on reviewer-relevant outcomes</article-title>
          <source>BMC Med Res Methodol</source>
          <year>2020</year>
          <volume>20</volume>
          <issue>1</issue>
          <fpage>256</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcmedresmethodol.biomedcentral.com/articles/10.1186/s12874-020-01129-1"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12874-020-01129-1</pub-id>
          <pub-id pub-id-type="medline">33059590</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12874-020-01129-1</pub-id>
          <pub-id pub-id-type="pmcid">PMC7559198</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gates</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Johnson</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Hartling</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Technology-assisted title and abstract screening for systematic reviews: a retrospective evaluation of the Abstrackr machine learning tool</article-title>
          <source>Syst Rev</source>
          <year>2018</year>
          <volume>7</volume>
          <issue>1</issue>
          <fpage>45</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://systematicreviewsjournal.biomedcentral.com/articles/10.1186/s13643-018-0707-8"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s13643-018-0707-8</pub-id>
          <pub-id pub-id-type="medline">29530097</pub-id>
          <pub-id pub-id-type="pii">10.1186/s13643-018-0707-8</pub-id>
          <pub-id pub-id-type="pmcid">PMC5848519</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Marshall</surname>
              <given-names>IJ</given-names>
            </name>
            <name name-style="western">
              <surname>Kuiper</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Wallace</surname>
              <given-names>BC</given-names>
            </name>
          </person-group>
          <article-title>RobotReviewer: evaluation of a system for automatically assessing bias in clinical trials</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2016</year>
          <volume>23</volume>
          <issue>1</issue>
          <fpage>193</fpage>
          <lpage>201</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/26104742"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/jamia/ocv044</pub-id>
          <pub-id pub-id-type="medline">26104742</pub-id>
          <pub-id pub-id-type="pii">ocv044</pub-id>
          <pub-id pub-id-type="pmcid">PMC4713900</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Nye</surname>
              <given-names>BE</given-names>
            </name>
            <name name-style="western">
              <surname>Nenkova</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Marshall</surname>
              <given-names>IJ</given-names>
            </name>
            <name name-style="western">
              <surname>Wallace</surname>
              <given-names>BC</given-names>
            </name>
          </person-group>
          <article-title>Trialstreamer: mapping and browsing medical evidence in real-time</article-title>
          <source>Proc Conf</source>
          <year>2020</year>
          <volume>2020</volume>
          <fpage>63</fpage>
          <lpage>69</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/34136886"/>
          </comment>
          <pub-id pub-id-type="doi">10.18653/v1/2020.acl-demos.9</pub-id>
          <pub-id pub-id-type="medline">34136886</pub-id>
          <pub-id pub-id-type="pmcid">PMC8204713</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Moreno-Garcia</surname>
              <given-names>CF</given-names>
            </name>
            <name name-style="western">
              <surname>Jayne</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Elyan</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Aceves-Martins</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>A novel application of machine learning and zero-shot classification methods for automated abstract screening in systematic reviews</article-title>
          <source>Decis Anal J</source>
          <year>2023</year>
          <volume>6</volume>
          <fpage>100162</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.sciencedirect.com/science/article/pii/S2772662223000024?via%3Dihub"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.dajour.2023.100162</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wallace</surname>
              <given-names>BC</given-names>
            </name>
            <name name-style="western">
              <surname>Trikalinos</surname>
              <given-names>TA</given-names>
            </name>
            <name name-style="western">
              <surname>Lau</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Brodley</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Schmid</surname>
              <given-names>CH</given-names>
            </name>
          </person-group>
          <article-title>Semi-automated screening of biomedical citations for systematic reviews</article-title>
          <source>BMC Bioinformatics</source>
          <year>2010</year>
          <volume>11</volume>
          <fpage>55</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcbioinformatics.biomedcentral.com/articles/10.1186/1471-2105-11-55"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/1471-2105-11-55</pub-id>
          <pub-id pub-id-type="medline">20102628</pub-id>
          <pub-id pub-id-type="pii">1471-2105-11-55</pub-id>
          <pub-id pub-id-type="pmcid">PMC2824679</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ouzzani</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Hammady</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Fedorowicz</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Elmagarmid</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Rayyan-a web and mobile app for systematic reviews</article-title>
          <source>Syst Rev</source>
          <year>2016</year>
          <volume>5</volume>
          <issue>1</issue>
          <fpage>210</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://systematicreviewsjournal.biomedcentral.com/articles/10.1186/s13643-016-0384-4"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s13643-016-0384-4</pub-id>
          <pub-id pub-id-type="medline">27919275</pub-id>
          <pub-id pub-id-type="pii">10.1186/s13643-016-0384-4</pub-id>
          <pub-id pub-id-type="pmcid">PMC5139140</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Olofsson</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Brolund</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Hellberg</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Silverstein</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Stenström</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Österberg</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Dagerhamn</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Can abstract screening workload be reduced using text mining? User experiences of the tool Rayyan</article-title>
          <source>Res Synth Methods</source>
          <year>2017</year>
          <volume>8</volume>
          <issue>3</issue>
          <fpage>275</fpage>
          <lpage>280</lpage>
          <pub-id pub-id-type="doi">10.1002/jrsm.1237</pub-id>
          <pub-id pub-id-type="medline">28374510</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Shree</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>The journey of Open AI GPT models</article-title>
          <source>Medium</source>
          <year>2020</year>
          <access-date>2023-04-19</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://medium.com/walmartglobaltech/the-journey-of-open-ai-gpt-models-32d95b7b7fb2">https://medium.com/walmartglobaltech/the-journey-of-open-ai-gpt-models-32d95b7b7fb2</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>O'Mara-Eves</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Thomas</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>McNaught</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Miwa</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Ananiadou</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Using text mining for study identification in systematic reviews: a systematic review of current approaches</article-title>
          <source>Syst Rev</source>
          <year>2015</year>
          <volume>4</volume>
          <issue>1</issue>
          <fpage>5</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://systematicreviewsjournal.biomedcentral.com/articles/10.1186/2046-4053-4-5"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/2046-4053-4-5</pub-id>
          <pub-id pub-id-type="medline">25588314</pub-id>
          <pub-id pub-id-type="pii">2046-4053-4-5</pub-id>
          <pub-id pub-id-type="pmcid">PMC4320539</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Guo</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Gupta</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Deng</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Park</surname>
              <given-names>YJ</given-names>
            </name>
            <name name-style="western">
              <surname>Paget</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Naugler</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Automated paper screening for clinical reviews using large language models</article-title>
          <source>Mendeley Data</source>
          <year>2023</year>
          <access-date>2023-12-15</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://data.mendeley.com/datasets/np79tmhkh5/1">https://data.mendeley.com/datasets/np79tmhkh5/1</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="web">
          <article-title>Assessing the research landscape and utility of LLMs in the clinical setting: protocol for a scoping review</article-title>
          <source>OSF Registries</source>
          <access-date>2023-12-15</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://osf.io/498k6">https://osf.io/498k6</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Deng</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Zhou</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Ali</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Heybati</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Hou</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Wong</surname>
              <given-names>C Y</given-names>
            </name>
          </person-group>
          <article-title>Efficacy and safety of ivermectin for the treatment of COVID-19: a systematic review and meta-analysis</article-title>
          <source>QJM</source>
          <year>2021</year>
          <volume>114</volume>
          <issue>10</issue>
          <fpage>721</fpage>
          <lpage>732</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/34570241"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/qjmed/hcab247</pub-id>
          <pub-id pub-id-type="medline">34570241</pub-id>
          <pub-id pub-id-type="pii">6375958</pub-id>
          <pub-id pub-id-type="pmcid">PMC8500108</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Deng</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Rayner</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Ramaraju</surname>
              <given-names>HB</given-names>
            </name>
            <name name-style="western">
              <surname>Abbas</surname>
              <given-names>U</given-names>
            </name>
            <name name-style="western">
              <surname>Garcia</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Heybati</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Zhou</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Park</surname>
              <given-names>YJ</given-names>
            </name>
            <name name-style="western">
              <surname>Moskalyk</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Efficacy and safety of selective serotonin reuptake inhibitors in COVID-19 management: a systematic review and meta-analysis</article-title>
          <source>Clin Microbiol Infect</source>
          <year>2023</year>
          <volume>29</volume>
          <issue>5</issue>
          <fpage>578</fpage>
          <lpage>586</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/36657488"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.cmi.2023.01.010</pub-id>
          <pub-id pub-id-type="medline">36657488</pub-id>
          <pub-id pub-id-type="pii">S1198-743X(23)00032-0</pub-id>
          <pub-id pub-id-type="pmcid">PMC9841740</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Deng</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Zhou</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Hou</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Heybati</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Ali</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Chang</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Silver</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Dhivagaran</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Ramaraju</surname>
              <given-names>HB</given-names>
            </name>
            <name name-style="western">
              <surname>Wong</surname>
              <given-names>CY</given-names>
            </name>
            <name name-style="western">
              <surname>Zuo</surname>
              <given-names>QK</given-names>
            </name>
            <name name-style="western">
              <surname>Lapshina</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Mellett</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Efficacy of lopinavir-ritonavir combination therapy for the treatment of hospitalized COVID-19 patients: a meta-analysis</article-title>
          <source>Future Virol</source>
          <year>2021</year>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/35145560"/>
          </comment>
          <pub-id pub-id-type="doi">10.2217/fvl-2021-0066</pub-id>
          <pub-id pub-id-type="medline">35145560</pub-id>
          <pub-id pub-id-type="pmcid">PMC8815807</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhou</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Zheng</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Deng</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>The use of acupuncture in patients with Raynaud's syndrome: a systematic review and meta-analysis of randomized controlled trials</article-title>
          <source>Acupunct Med</source>
          <year>2023</year>
          <volume>41</volume>
          <issue>2</issue>
          <fpage>63</fpage>
          <lpage>72</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://journals.sagepub.com/doi/abs/10.1177/09645284221076504?url_ver=Z39.88-2003&#38;rfr_id=ori:rid:crossref.org&#38;rfr_dat=cr_pub  0pubmed"/>
          </comment>
          <pub-id pub-id-type="doi">10.1177/09645284221076504</pub-id>
          <pub-id pub-id-type="medline">35608095</pub-id>
          <pub-id pub-id-type="pmcid">PMC10115941</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Heybati</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Zhou</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Lynn</surname>
              <given-names>MJ</given-names>
            </name>
            <name name-style="western">
              <surname>Deng</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Ali</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Hou</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Heybati</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Tzanis</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Krever</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Mughal</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Ramakrishna</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Comparative efficacy of adjuvant nonopioid analgesia in adult cardiac surgical patients: a network meta-analysis</article-title>
          <source>J Cardiothorac Vasc Anesth</source>
          <year>2023</year>
          <volume>37</volume>
          <issue>7</issue>
          <fpage>1169</fpage>
          <lpage>1178</lpage>
          <pub-id pub-id-type="doi">10.1053/j.jvca.2023.03.018</pub-id>
          <pub-id pub-id-type="medline">37088644</pub-id>
          <pub-id pub-id-type="pii">S1053-0770(23)00188-X</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Liang</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Feng</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Sun</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>He</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Zhu</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Pan</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Automation of literature screening using machine learning in medical evidence synthesis: a diagnostic test accuracy systematic review protocol</article-title>
          <source>Syst Rev</source>
          <year>2022</year>
          <volume>11</volume>
          <issue>1</issue>
          <fpage>11</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://systematicreviewsjournal.biomedcentral.com/articles/10.1186/s13643-021-01881-5"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s13643-021-01881-5</pub-id>
          <pub-id pub-id-type="medline">35031074</pub-id>
          <pub-id pub-id-type="pii">10.1186/s13643-021-01881-5</pub-id>
          <pub-id pub-id-type="pmcid">PMC8760775</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>van de Schoot</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>de Bruin</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Schram</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Zahedi</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>de Boer</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Weijdema</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Kramer</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Huijts</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Hoogerwerf</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Ferdinands</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Harkema</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Willemsen</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Ma</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Fang</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Hindriks</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Tummers</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Oberski</surname>
              <given-names>DL</given-names>
            </name>
          </person-group>
          <article-title>An open source machine learning framework for efficient and transparent systematic reviews</article-title>
          <source>Nat Mach Intell</source>
          <year>2021</year>
          <volume>3</volume>
          <issue>2</issue>
          <fpage>125</fpage>
          <lpage>133</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.nature.com/articles/s42256-020-00287-7"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s42256-020-00287-7</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Przybyła</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Brockmeier</surname>
              <given-names>AJ</given-names>
            </name>
            <name name-style="western">
              <surname>Kontonatsios</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Le Pogam</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>McNaught</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>von Elm</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Nolan</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Ananiadou</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Prioritising references for systematic reviews with RobotAnalyst: a user study</article-title>
          <source>Res Synth Methods</source>
          <year>2018</year>
          <volume>9</volume>
          <issue>3</issue>
          <fpage>470</fpage>
          <lpage>488</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/29956486"/>
          </comment>
          <pub-id pub-id-type="doi">10.1002/jrsm.1311</pub-id>
          <pub-id pub-id-type="medline">29956486</pub-id>
          <pub-id pub-id-type="pmcid">PMC6175382</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gates</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Guitard</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Pillay</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Elliott</surname>
              <given-names>SA</given-names>
            </name>
            <name name-style="western">
              <surname>Dyson</surname>
              <given-names>MP</given-names>
            </name>
            <name name-style="western">
              <surname>Newton</surname>
              <given-names>AS</given-names>
            </name>
            <name name-style="western">
              <surname>Hartling</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Performance and usability of machine learning for screening in systematic reviews: a comparative evaluation of three tools</article-title>
          <source>AHRQ Methods for Effective Health Care</source>
          <year>2019</year>
          <publisher-loc>Rockville, MD</publisher-loc>
          <publisher-name>Agency for Healthcare Research and Quality (US)</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref38">
        <label>38</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Schade</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Fine-tuning a classifier to improve truthfulness</article-title>
          <source>OpenAI</source>
          <access-date>2023-04-20</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://help.openai.com/en/articles/5528730-fine-tuning-a-classifier-to-improve-truthfulness">https://help.openai.com/en/articles/5528730-fine-tuning-a-classifier-to-improve-truthfulness</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref39">
        <label>39</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Joshua</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>What is the difference between the GPT-4 models?</article-title>
          <source>OpenAI</source>
          <access-date>2023-04-19</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://help.openai.com/en/articles/7127966-what-is-the-difference-between-the-gpt-4-models">https://help.openai.com/en/articles/7127966-what-is-the-difference-between-the-gpt-4-models</ext-link>
          </comment>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
