<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="brief-report" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMIR</journal-id>
      <journal-id journal-id-type="nlm-ta">J Med Internet Res</journal-id>
      <journal-title>Journal of Medical Internet Research</journal-title>
      <issn pub-type="epub">1438-8871</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v27i1e66220</article-id>
      <article-id pub-id-type="pmid">39761554</article-id>
      <article-id pub-id-type="doi">10.2196/66220</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Short Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Short Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Two-Layer Retrieval-Augmented Generation Framework for Low-Resource Medical Question Answering Using Reddit Data: Proof-of-Concept Study</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Mavragani</surname>
            <given-names>Amaryllis</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Xiong</surname>
            <given-names>Guangzhi</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Jeong</surname>
            <given-names>Minbyul</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author" corresp="yes" equal-contrib="yes">
          <name name-style="western">
            <surname>Das</surname>
            <given-names>Sudeshna</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff01" ref-type="aff">1</xref>
          <address>
            <institution>Department of Biomedical Informatics</institution>
            <institution>School of Medicine</institution>
            <institution>Emory University</institution>
            <addr-line>101 Woodruff Circle</addr-line>
            <addr-line>Atlanta, GA, 30322</addr-line>
            <country>United States</country>
            <phone>1 4047270229</phone>
            <email>sudeshna.das@emory.edu</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-2112-6986</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Ge</surname>
            <given-names>Yao</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff01" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-3323-7130</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Guo</surname>
            <given-names>Yuting</given-names>
          </name>
          <degrees>MS</degrees>
          <xref rid="aff01" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-8919-0888</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Rajwal</surname>
            <given-names>Swati</given-names>
          </name>
          <degrees>MTech</degrees>
          <xref rid="aff02" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-3826-5069</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author">
          <name name-style="western">
            <surname>Hairston</surname>
            <given-names>JaMor</given-names>
          </name>
          <degrees>MS</degrees>
          <xref rid="aff01" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-6069-5869</ext-link>
        </contrib>
        <contrib id="contrib6" contrib-type="author">
          <name name-style="western">
            <surname>Powell</surname>
            <given-names>Jeanne</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff01" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-3494-2376</ext-link>
        </contrib>
        <contrib id="contrib7" contrib-type="author">
          <name name-style="western">
            <surname>Walker</surname>
            <given-names>Drew</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff01" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-4216-2396</ext-link>
        </contrib>
        <contrib id="contrib8" contrib-type="author">
          <name name-style="western">
            <surname>Peddireddy</surname>
            <given-names>Snigdha</given-names>
          </name>
          <degrees>MPH</degrees>
          <xref rid="aff03" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-2972-1122</ext-link>
        </contrib>
        <contrib id="contrib9" contrib-type="author">
          <name name-style="western">
            <surname>Lakamana</surname>
            <given-names>Sahithi</given-names>
          </name>
          <degrees>MS</degrees>
          <xref rid="aff01" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-1304-7484</ext-link>
        </contrib>
        <contrib id="contrib10" contrib-type="author">
          <name name-style="western">
            <surname>Bozkurt</surname>
            <given-names>Selen</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff01" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-1234-2158</ext-link>
        </contrib>
        <contrib id="contrib11" contrib-type="author">
          <name name-style="western">
            <surname>Reyna</surname>
            <given-names>Matthew</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff01" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-4688-7965</ext-link>
        </contrib>
        <contrib id="contrib12" contrib-type="author">
          <name name-style="western">
            <surname>Sameni</surname>
            <given-names>Reza</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff01" ref-type="aff">1</xref>
          <xref rid="aff04" ref-type="aff">4</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-4913-6825</ext-link>
        </contrib>
        <contrib id="contrib13" contrib-type="author">
          <name name-style="western">
            <surname>Xiao</surname>
            <given-names>Yunyu</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff05" ref-type="aff">5</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-0479-1781</ext-link>
        </contrib>
        <contrib id="contrib14" contrib-type="author">
          <name name-style="western">
            <surname>Kim</surname>
            <given-names>Sangmi</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff06" ref-type="aff">6</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-1761-4696</ext-link>
        </contrib>
        <contrib id="contrib15" contrib-type="author">
          <name name-style="western">
            <surname>Chandler</surname>
            <given-names>Rasheeta</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff06" ref-type="aff">6</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-2021-6346</ext-link>
        </contrib>
        <contrib id="contrib16" contrib-type="author">
          <name name-style="western">
            <surname>Hernandez</surname>
            <given-names>Natalie</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff07" ref-type="aff">7</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-8911-6613</ext-link>
        </contrib>
        <contrib id="contrib17" contrib-type="author">
          <name name-style="western">
            <surname>Mowery</surname>
            <given-names>Danielle</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff08" ref-type="aff">8</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-3802-4457</ext-link>
        </contrib>
        <contrib id="contrib18" contrib-type="author">
          <name name-style="western">
            <surname>Wightman</surname>
            <given-names>Rachel</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff09" ref-type="aff">9</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-6141-1776</ext-link>
        </contrib>
        <contrib id="contrib19" contrib-type="author">
          <name name-style="western">
            <surname>Love</surname>
            <given-names>Jennifer</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff10" ref-type="aff">10</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-5882-4390</ext-link>
        </contrib>
        <contrib id="contrib20" contrib-type="author">
          <name name-style="western">
            <surname>Spadaro</surname>
            <given-names>Anthony</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff11" ref-type="aff">11</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-0941-4651</ext-link>
        </contrib>
        <contrib id="contrib21" contrib-type="author">
          <name name-style="western">
            <surname>Perrone</surname>
            <given-names>Jeanmarie</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff12" ref-type="aff">12</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-7073-9060</ext-link>
        </contrib>
        <contrib id="contrib22" contrib-type="author">
          <name name-style="western">
            <surname>Sarker</surname>
            <given-names>Abeed</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff01" ref-type="aff">1</xref>
          <xref rid="aff04" ref-type="aff">4</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-7358-544X</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff01">
        <label>1</label>
        <institution>Department of Biomedical Informatics</institution>
        <institution>School of Medicine</institution>
        <institution>Emory University</institution>
        <addr-line>Atlanta, GA</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff02">
        <label>2</label>
        <institution>Department of Computer Science and Informatics</institution>
        <institution>Emory University</institution>
        <addr-line>Atlanta, GA</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff03">
        <label>3</label>
        <institution>Department of Behavioral, Social &#38; Health Education Sciences</institution>
        <institution>Rollins School of Public Health</institution>
        <institution>Emory University</institution>
        <addr-line>Atlanta, GA</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff04">
        <label>4</label>
        <institution>Department of Biomedical Engineering</institution>
        <institution>Georgia Institute of Technology and Emory University</institution>
        <addr-line>Atlanta, GA</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff05">
        <label>5</label>
        <institution>Department of Population Health Sciences</institution>
        <institution>Weill Cornell Medicine</institution>
        <addr-line>New York, NY</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff06">
        <label>6</label>
        <institution>Nell Hodgson Woodruff School of Nursing</institution>
        <institution>Emory University</institution>
        <addr-line>Atlanta, GA</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff07">
        <label>7</label>
        <institution>Center for Maternal Health Equity</institution>
        <institution>Morehouse School of Medicine</institution>
        <addr-line>Atlanta, GA</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff08">
        <label>8</label>
        <institution>Department of Biostatistics, Epidemiology and Informatics</institution>
        <institution>Perelman School of Medicine</institution>
        <institution>University of Pennsylvania</institution>
        <addr-line>Philadelphia, PA</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff09">
        <label>9</label>
        <institution>Department of Emergency Medicine</institution>
        <institution>Warren Alpert Medical School of Brown University</institution>
        <addr-line>Providence, RI</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff10">
        <label>10</label>
        <institution>Department of Emergency Medicine</institution>
        <institution>Icahn School of Medicine at Mount Sinai</institution>
        <addr-line>New York, NY</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff11">
        <label>11</label>
        <institution>Department of Emergency Medicine</institution>
        <institution>Rutgers New Jersey Medical School</institution>
        <addr-line>Newark, NJ</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff12">
        <label>12</label>
        <institution>Department of Emergency Medicine</institution>
        <institution>Perelman School of Medicine at the University of Pennsylvania</institution>
        <addr-line>Philadelphia, PA</addr-line>
        <country>United States</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Sudeshna Das <email>sudeshna.das@emory.edu</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <year>2025</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>6</day>
        <month>1</month>
        <year>2025</year>
      </pub-date>
      <volume>27</volume>
      <elocation-id>e66220</elocation-id>
      <history>
        <date date-type="received">
          <day>6</day>
          <month>9</month>
          <year>2024</year>
        </date>
        <date date-type="rev-request">
          <day>26</day>
          <month>9</month>
          <year>2024</year>
        </date>
        <date date-type="rev-recd">
          <day>30</day>
          <month>10</month>
          <year>2024</year>
        </date>
        <date date-type="accepted">
          <day>5</day>
          <month>12</month>
          <year>2024</year>
        </date>
      </history>
      <copyright-statement>©Sudeshna Das, Yao Ge, Yuting Guo, Swati Rajwal, JaMor Hairston, Jeanne Powell, Drew Walker, Snigdha Peddireddy, Sahithi Lakamana, Selen Bozkurt, Matthew Reyna, Reza Sameni, Yunyu Xiao, Sangmi Kim, Rasheeta Chandler, Natalie Hernandez, Danielle Mowery, Rachel Wightman, Jennifer Love, Anthony Spadaro, Jeanmarie Perrone, Abeed Sarker. Originally published in the Journal of Medical Internet Research (https://www.jmir.org), 06.01.2025.</copyright-statement>
      <copyright-year>2025</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in the Journal of Medical Internet Research (ISSN 1438-8871), is properly cited. The complete bibliographic information, a link to the original publication on https://www.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://www.jmir.org/2025/1/e66220" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>The increasing use of social media to share lived and living experiences of substance use presents a unique opportunity to obtain information on side effects, use patterns, and opinions on novel psychoactive substances. However, due to the large volume of data, obtaining useful insights through natural language processing technologies such as large language models is challenging.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>This paper aims to develop a retrieval-augmented generation (RAG) architecture for medical question answering pertaining to clinicians’ queries on emerging issues associated with health-related topics, using user-generated medical information on social media.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>We proposed a two-layer RAG framework for query-focused answer generation and evaluated a proof of concept for the framework in the context of query-focused summary generation from social media forums, focusing on emerging drug-related information. Our modular framework generates individual summaries followed by an aggregated summary to answer medical queries from large amounts of user-generated social media data in an efficient manner. We compared the performance of a quantized large language model (Nous-Hermes-2-7B-DPO), deployable in low-resource settings, with GPT-4. For this proof-of-concept study, we used user-generated data from Reddit to answer clinicians’ questions on the use of xylazine and ketamine.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>Our framework achieves comparable median scores in terms of relevance, length, hallucination, coverage, and coherence when evaluated using GPT-4 and Nous-Hermes-2-7B-DPO, evaluated for 20 queries with 76 samples. There was no statistically significant difference between GPT-4 and Nous-Hermes-2-7B-DPO for coverage (Mann-Whitney <italic>U</italic>=733.0; <italic>n</italic><sub>1</sub>=37; <italic>n</italic><sub>2</sub>=39; <italic>P</italic>=.89 two-tailed), coherence (<italic>U</italic>=670.0; <italic>n</italic><sub>1</sub>=37; <italic>n</italic><sub>2</sub>=39; <italic>P</italic>=.49 two-tailed), relevance (<italic>U</italic>=662.0; <italic>n</italic><sub>1</sub>=37; <italic>n</italic><sub>2</sub>=39; <italic>P</italic>=.15 two-tailed), length (<italic>U</italic>=672.0; <italic>n</italic><sub>1</sub>=37; <italic>n</italic><sub>2</sub>=39; <italic>P</italic>=.55 two-tailed), and hallucination (<italic>U</italic>=859.0; <italic>n</italic><sub>1</sub>=37; <italic>n</italic><sub>2</sub>=39; <italic>P</italic>=.01 two-tailed). A statistically significant difference was noted for the Coleman-Liau Index (<italic>U</italic>=307.5; <italic>n</italic><sub>1</sub>=20; <italic>n</italic><sub>2</sub>=16; <italic>P</italic>&#60;.001 two-tailed).</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>Our RAG framework can effectively answer medical questions about targeted topics and can be deployed in resource-constrained settings.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>retrieval-augmented generation</kwd>
        <kwd>substance use</kwd>
        <kwd>social media</kwd>
        <kwd>large language models</kwd>
        <kwd>natural language processing</kwd>
        <kwd>artificial intelligence</kwd>
        <kwd>GPT</kwd>
        <kwd>psychoactive substance</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <p>Large language models (LLMs) present opportunities for solving complex biomedical natural language processing problems, such as medical question answering (MQA). However, operational challenges (eg, high computational resource requirements) hinder their real-life deployment and use. Another issue with LLM-generated text for MQA is “hallucination”: generated text that is plausible-sounding but nonsensical or incorrect [<xref ref-type="bibr" rid="ref1">1</xref>]. Chain-of-thought prompting [<xref ref-type="bibr" rid="ref2">2</xref>], self-reflection [<xref ref-type="bibr" rid="ref1">1</xref>], and retrieval-augmented generation (RAG) are forerunners in mitigating hallucination. RAG also aids in constraining generated texts and improves in-context learning [<xref ref-type="bibr" rid="ref3">3</xref>]. LLMs in RAG frameworks have been used in the biomedical domain owing to the need for timely, accurate, and transparent responses [<xref ref-type="bibr" rid="ref4">4</xref>]. As LLMs become increasingly integrated into clinical practice [<xref ref-type="bibr" rid="ref5">5</xref>], it is important to ensure their operability in low-resource settings [<xref ref-type="bibr" rid="ref6">6</xref>] while generating accurate and coherent texts.</p>
      <p>We present a proof-of-concept study for a two-layer RAG framework for MQA that ingests user-generated medical information from Reddit. We used smaller, quantized, open-source LLMs that can run on personal computers without specialized hardware, allowing our framework to be used in low-resource settings, thus ensuring equitable access to timely medical information.</p>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Study Design</title>
        <p>We evaluated our proof-of-concept study in a setting where copious amounts of data are available for a topic but gathering insights and answering questions require substantial manual work—the topic of emerging drugs from Reddit. Reddit has ~52 million daily active users, is commonly used to study emerging medical themes [<xref ref-type="bibr" rid="ref7">7</xref>], and features numerous discussions on the nonmedical uses of substances. Recently, Reddit data have been leveraged to study novel psychoactive substances since such information is not typically available elsewhere. We chose two substances that have gained attention recently—xylazine (because of its increasing impact and association with the US opioid crisis) and ketamine (because of its recent popularity as a treatment for depression). We collected all available data (~2.5 billion posts) from Reddit until December 31, 2023, and extracted all posts mentioning xylazine (n=177,684) and ketamine (n=7699) for our retrieval engine. Based on clinician-driven interests, we formulated 20 queries (<xref ref-type="table" rid="table1">Table 1</xref>).</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Queries used for evaluating the framework.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="250"/>
            <col width="750"/>
            <thead>
              <tr valign="top">
                <td>Query ID</td>
                <td>Query</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>1</td>
                <td>What are the side effects of xylazine?</td>
              </tr>
              <tr valign="top">
                <td>2</td>
                <td>What does xylazine do to the skin?</td>
              </tr>
              <tr valign="top">
                <td>3</td>
                <td>How does xylazine impact rehab?</td>
              </tr>
              <tr valign="top">
                <td>4</td>
                <td>What is xylazine withdrawal like?</td>
              </tr>
              <tr valign="top">
                <td>5</td>
                <td>What drugs contain xylazine?</td>
              </tr>
              <tr valign="top">
                <td>6</td>
                <td>What treatments work for xylazine?</td>
              </tr>
              <tr valign="top">
                <td>7</td>
                <td>What drugs are mixed or cut with xylazine?</td>
              </tr>
              <tr valign="top">
                <td>8</td>
                <td>What areas of the United States are impacted by xylazine?</td>
              </tr>
              <tr valign="top">
                <td>9</td>
                <td>How is xylazine different from pure heroin?</td>
              </tr>
              <tr valign="top">
                <td>10</td>
                <td>What is the general sentiment associated with xylazine?</td>
              </tr>
              <tr valign="top">
                <td>11</td>
                <td>Does narcan or naloxone work for xylazine overdose?</td>
              </tr>
              <tr valign="top">
                <td>12</td>
                <td>What are the side effects of ketamine?</td>
              </tr>
              <tr valign="top">
                <td>13</td>
                <td>What is ketamine withdrawal like?</td>
              </tr>
              <tr valign="top">
                <td>14</td>
                <td>What are k cramps like?</td>
              </tr>
              <tr valign="top">
                <td>15</td>
                <td>How do the users describe k hole?</td>
              </tr>
              <tr valign="top">
                <td>16</td>
                <td>Does ketamine work for depression?</td>
              </tr>
              <tr valign="top">
                <td>17</td>
                <td>What drugs are ketamine coused with recreationally?</td>
              </tr>
              <tr valign="top">
                <td>18</td>
                <td>Is ketamine effective for the treatment of suicidal behavior?</td>
              </tr>
              <tr valign="top">
                <td>19</td>
                <td>How can you treat ketamine addiction?</td>
              </tr>
              <tr valign="top">
                <td>20</td>
                <td>Does ketamine use cause cramps?</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
      </sec>
      <sec>
        <title>System Architecture</title>
        <p>As depicted in <xref rid="figure1" ref-type="fig">Figure 1</xref>, the user submits a query to be parsed by the information retrieval engine, which returns a ranked list of documents. The top n documents are chosen to be sources for answer generation. In the first layer, the LLM is provided with (1) a query, (2) text from the retrieved documents, and (3) a prompt that embeds the text and instructs the LLM to summarize it (<xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). Since the prompt context window is finite, feeding the LLM all the retrieved text for answer generation is typically impossible. Even single documents can be too long. Thus, the framework allows for the specification of segment lengths for the retrieved text in each iteration, ensuring the framework is applied to relatively small LLMs with shorter context lengths. The first layer generates short, query-focused summaries (<xref rid="figure1" ref-type="fig">Figure 1</xref>). The LLM states if the retrieved text segment does not contain an answer to the question. Examples of this first-layer summarization are provided in <xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>.</p>
        <p>The second layer takes as input the original query and individual short summaries embedded within a second prompt (<xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>) while ignoring summaries that the LLM states did not contain the answer. <xref rid="figure1" ref-type="fig">Figure 1</xref> depicts an example of the final, synthesized summary.</p>
        <p>We used the 8-bit quantized model Nous-Hermes-2-7B-DPO as our LLM, which is tuned on 1,000,000 high-quality instructions [<xref ref-type="bibr" rid="ref8">8</xref>]. To test the performance of the proposed framework with larger models, we also performed an evaluation using GPT-4 [<xref ref-type="bibr" rid="ref9">9</xref>].</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>Overview of the two-layer RAG framework. The first layer generates individual summaries based on retrieved posts relevant to the original query. The second layer generates the final summary based on the individual summaries generated in the first layer. LLM: large language model; RAG: retrieval-augmented generation.</p>
          </caption>
          <graphic xlink:href="jmir_v27i1e66220_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>RAG Architecture</title>
        <p>Since the retrieval aspect is not our primary focus, we used a simple keyword-based approach using the default search settings provided by our information retrieval package “Whoosh,” which uses Okapi BM25F ranking [<xref ref-type="bibr" rid="ref10">10</xref>]. The top 50 retrieved documents were chosen for generating the first-layer summaries. This number may be adjusted without changes to the architecture. The number of text segments is typically higher when posts do not fit within the context window of the LLM after being embedded within the prompt.</p>
      </sec>
      <sec>
        <title>Evaluation</title>
        <p>Our evaluation focused on the architecture’s summary generation quality, rather than retrieval performance. Commonly used automatic summary evaluation methods, such as Recall-Oriented Understudy for Gisting Evaluation (ROUGE) [<xref ref-type="bibr" rid="ref11">11</xref>] and bilingual evaluation understudy (BLEU) [<xref ref-type="bibr" rid="ref12">12</xref>], primarily focus on text overlap between generated and gold-standard summaries. In the absence of gold-standard summaries, subject matter experts manually and qualitatively evaluated the important nuances of generative summaries, which is impossible with ROUGE or BLEU. We used Likert-scale evaluations (<xref ref-type="table" rid="table2">Table 2</xref>). Each query–individual summary–final summary triplet was evaluated by ≥2 evaluators (at least a master’s degree in medicine, public health, informatics, or allied fields). Overall, 21 experts generated 76 evaluations for 20 unique queries.</p>
        <p>We also assessed the readability of the final summaries using the Coleman-Liau Readability Index (CLI) [<xref ref-type="bibr" rid="ref13">13</xref>], which approximates the US grade level required to comprehend text.</p>
        <p>We performed nonparametric tests for proportions (Mann-Whitney <italic>U</italic> test) with the null hypothesis (H<sub>0</sub>: “The two populations are equal”) to determine if the scores assigned to answers generated by GPT-4 and Nous-Hermes-2-7B-DPO vary significantly. All tests were performed using the <italic>SciPy</italic> package [<xref ref-type="bibr" rid="ref14">14</xref>]. The null hypothesis (H<sub>0</sub>) was rejected if <italic>P</italic>&#60;.05 (two-tailed).</p>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>Evaluation criteria and scales presented to annotators.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="250"/>
            <col width="250"/>
            <col width="500"/>
            <thead>
              <tr valign="top">
                <td>Criteria</td>
                <td>Question</td>
                <td>Evaluation scale</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Coverage</td>
                <td>Does the final summary accurately represent the information present in the original text?</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>5: Yes; the final summary covers all the important information present in the original text.</p>
                    </list-item>
                    <list-item>
                      <p>4: Mostly; the final summary covers most, but not all of the important information.</p>
                    </list-item>
                    <list-item>
                      <p>3: Somewhat; the final summary covers some of the important information, but also misses some of them.</p>
                    </list-item>
                    <list-item>
                      <p>2: Not really; the final summary misses most of the important information.</p>
                    </list-item>
                    <list-item>
                      <p>1: No; the final summary does not cover any of the important information present in the original text.</p>
                    </list-item>
                  </list>
                </td>
              </tr>
              <tr valign="top">
                <td>Coherence</td>
                <td>Is the final summary coherent?</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>5: Yes; the final summary is easy to read and understand.</p>
                    </list-item>
                    <list-item>
                      <p>4: Mostly; the final summary is readable, but not straightforward to understand.</p>
                    </list-item>
                    <list-item>
                      <p>3: Somewhat; the final summary is readable but confusing.</p>
                    </list-item>
                    <list-item>
                      <p>2: Not really; the final summary has some grammatical errors or nonsequiturs.</p>
                    </list-item>
                    <list-item>
                      <p>1: No; the final summary is unintelligible or incomprehensible.</p>
                    </list-item>
                  </list>
                </td>
              </tr>
              <tr valign="top">
                <td>Relevance</td>
                <td>Does the final summary answer the original question?</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>3: Yes; the summary answers the original question.</p>
                    </list-item>
                    <list-item>
                      <p>2: Partially; the summary answers the original question, but not fully.</p>
                    </list-item>
                    <list-item>
                      <p>1: No; the summary does not answer the original question.</p>
                    </list-item>
                  </list>
                </td>
              </tr>
              <tr valign="top">
                <td>Length</td>
                <td>Is the length of the final summary appropriate?</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>3: Yes; the summary is appropriate in length.</p>
                    </list-item>
                    <list-item>
                      <p>2: Somewhat; the summary could be shorter or longer.</p>
                    </list-item>
                    <list-item>
                      <p>1: No; the summary is long-winded or too short.</p>
                    </list-item>
                  </list>
                </td>
              </tr>
              <tr valign="top">
                <td>Hallucination</td>
                <td>Does the summary contain information not present in the original text?</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>0: No; the summary does not contain information not present in the original text.</p>
                    </list-item>
                    <list-item>
                      <p>1: Yes; the summary contains information not present in the original text.</p>
                    </list-item>
                  </list>
                </td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
      </sec>
      <sec>
        <title>Ethical Considerations</title>
        <p>This study was deemed to be exempt from review per the Emory University Institutional Review Board's guidelines. The data used in this study are anonymous by default. We ensured that self-disclosed, personally identifiable information is not used by only reporting aggregated data. We removed posts that were deleted by the user.</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <p>We conducted extensive expert evaluations of the generated answers for coverage, coherence, relevance, length, and hallucination (<xref rid="figure2" ref-type="fig">Figure 2</xref>). Annotators were not made aware of which LLM was used to generate the summaries for fair evaluation. On a 5-point Likert scale, median coverage scores were 5 (IQR 4-5) for both models; the distributions did not differ significantly (<italic>U</italic>=733.0; <italic>n</italic><sub>1</sub>=37; <italic>n</italic><sub>2</sub>=39; <italic>P</italic>=.89). Median coherence scores were 5 (IQR 5-5 for GPT-4; IQR 4-5 for Nous-Hermes-2-7B-DPO) for both; they did not differ significantly (<italic>U</italic>=670.0; <italic>n</italic><sub>1</sub>=37; <italic>n</italic><sub>2</sub>=39; <italic>P</italic>=.49).</p>
      <p>On a 3-point Likert scale, the median relevance scores were 3 (IQR 3-3) for both; they did not differ significantly (<italic>U</italic>=662.0; <italic>n</italic><sub>1</sub>=37; <italic>n</italic><sub>2</sub>=39; <italic>P</italic>=.15). Median length scores were 3 (IQR 2-3) for both; they did not differ significantly (<italic>U</italic>=672.0; <italic>n</italic><sub>1</sub>=37; <italic>n</italic><sub>2</sub>=39; <italic>P</italic>=.55). On a binary Likert scale, median hallucination scores were 0 (IQR 0-0) for both; they did not differ significantly (<italic>U</italic>=859.0; <italic>n</italic><sub>1</sub>=37; <italic>n</italic><sub>2</sub>=39; <italic>P</italic>=.10). The median CLIs were 16.635 (IQR 13.860-17.675) for GPT-4 and 12.125 (IQR 11.02-13.98) for Nous-Hermes-2-7B-DPO; there was a statistically significant difference (<italic>U</italic>=307.5; <italic>n</italic><sub>1</sub>=20; <italic>n</italic><sub>2</sub>=16; <italic>P</italic>&#60;.001).</p>
      <p>Median token counts for queries posed to GPT-4 and Nous-Hermes-2-7B-DPO were 5 (IQR 5-7) and 7 (IQR 5-8), respectively; there was no significant difference (<italic>U</italic>=165.0; <italic>n</italic><sub>1</sub>=20; <italic>n</italic><sub>2</sub>=16; <italic>P</italic>=.66). Median lengths of responses generated by GPT-4 and Nous-Hermes-2-7B-DPO were 1118 (IQR 709-2986) and 441 (IQR 231-695) for the combined individual summaries and 141.5 (IQR 115-159) and 61 (IQR 28-87) for the final summaries, respectively. Both were significantly different (<italic>U</italic>=300.0; <italic>n</italic><sub>1</sub>=20; <italic>n</italic><sub>2</sub>=16; <italic>P</italic>&#60;.001, and <italic>U</italic>=145.5; <italic>n</italic><sub>1</sub>=20; <italic>n</italic><sub>2</sub>=16; <italic>P</italic>&#60;.001).</p>
      <fig id="figure2" position="float">
        <label>Figure 2</label>
        <caption>
          <p>Box plots illustrating the distribution of scores for the evaluation criteria used. (A) Coverage on a 5-point Likert scale. (B) Coherence on a 5-point Likert scale. (C) Relevance on a 3-point Likert scale. (D) Length on a 3-point Likert scale. (E) Hallucination on a binary scale. (F) Values for the Coleman-Liau Index. (G) Token counts for Questions. (H) Token counts for combined individual summaries. (I) Token counts for the final summary.</p>
        </caption>
        <graphic xlink:href="jmir_v27i1e66220_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
      </fig>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <p>This study presents a novel two-layer RAG framework for MQA that uses user-generated content from Reddit. Our findings demonstrate that the framework effectively synthesizes accurate and contextually relevant answers even in low-resource settings, aligning with our goal to create an accessible, computationally lightweight tool. Focusing on small, quantized, open-source LLMs ensures equitable access to valuable insights about emerging trends, potential side effects, and general perception of substances, as reflected in Reddit posts.</p>
        <p>The modular structure of the framework enables good performance without requiring specialized hardware, which is critical in low-resource environments. This modularity also supports using different retrieval engines or LLMs, providing flexibility to adapt to various use cases. The system’s ability to answer nuanced queries (eg “What are k cramps like?”—which would require extensive manual curation) illustrates its potential for real-world applications. The framework’s ability to specify temporal ranges in queries allows it to track trends over time, offering opportunities for longitudinal studies and misinformation detection.</p>
        <p>Unlike previous work [<xref ref-type="bibr" rid="ref15">15</xref>], where segments of text are generated chronologically, we performed segmentation at the post level without accounting for chronology. Compared with existing literature, which predominantly relies on large LLMs for MQA [<xref ref-type="bibr" rid="ref4">4</xref>,<xref ref-type="bibr" rid="ref16">16</xref>], this study underscores the potential of smaller models for tasks requiring domain-specific, contextually accurate outputs. Prior work often focuses on high-resource settings [<xref ref-type="bibr" rid="ref17">17</xref>,<xref ref-type="bibr" rid="ref18">18</xref>] with robust computational infrastructure, leaving gaps in applicability for low-resource environments. We fill this gap by showing that reliable performance can be achieved with computationally efficient architectures, expanding the reach of artificial intelligence tools to underresourced regions. Although smaller LLMs have been used [<xref ref-type="bibr" rid="ref19">19</xref>], summarization from large volumes of text with aggregated information has not been addressed before.</p>
      </sec>
      <sec>
        <title>Limitations</title>
        <p>Despite its promising features, the framework has limitations. It relies on the accuracy and representativeness of the Reddit data it ingests. Reddit posts may include biases, inaccuracies, or misinformation that could influence the system’s output. While faithfully summarizing misinformation is valuable for transparency, users need to exercise caution in interpreting the results. Additionally, we evaluated the framework using a small set of queries pertaining to substance use; further validation is necessary to assess performance across diverse medical domains.</p>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>This study demonstrates that a modular, lightweight RAG framework can effectively address complex MQA using social media data in low-resource settings. By enabling clinicians to rapidly extract insights about substance use trends and potential side effects from Reddit posts, the framework holds significant potential for improving public health.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>Prompts used.</p>
        <media xlink:href="jmir_v27i1e66220_app1.docx" xlink:title="DOCX File , 12 KB"/>
      </supplementary-material>
      <supplementary-material id="app2">
        <label>Multimedia Appendix 2</label>
        <p>Sample first-layer individual summaries.</p>
        <media xlink:href="jmir_v27i1e66220_app2.docx" xlink:title="DOCX File , 14 KB"/>
      </supplementary-material>
      <supplementary-material id="app3">
        <label>Multimedia Appendix 3</label>
        <p>Final summaries generated by the framework for each of the 20 queries used for evaluation.</p>
        <media xlink:href="jmir_v27i1e66220_app3.docx" xlink:title="DOCX File , 30 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">BLEU</term>
          <def>
            <p>Bilingual Evaluation Understudy</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">CLI</term>
          <def>
            <p>Coleman-Liau Index</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">LLM</term>
          <def>
            <p>large language model</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">MQA</term>
          <def>
            <p>medical question answering</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">RAG</term>
          <def>
            <p>retrieval-augmented generation</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">ROUGE</term>
          <def>
            <p>Recall-Oriented Understudy for Gisting Evaluation</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>Research reported in this publication was supported by the National Institute on Drug Abuse of the National Institutes of Health (NIH) under award R01DA057599. The content is solely the responsibility of the authors and does not necessarily represent the official views of the NIH.</p>
    </ack>
    <notes>
      <sec>
        <title>Data Availability</title>
        <p>All data used in this study were publicly available from Reddit at the time of data collection. The second-level summaries are available in <xref ref-type="supplementary-material" rid="app3">Multimedia Appendix 3</xref>. The original posts and social media posts analyzed during this study are available from the corresponding author upon reasonable request and the completion of a data use agreement.</p>
      </sec>
    </notes>
    <fn-group>
      <fn fn-type="con">
        <p>SD and Y Ge led analysis, evaluation, visualization, and original draft preparation. Y Guo, SR, JH, J Powell, DW, SP, and SL contributed to the evaluation and manuscript preparation. SB, MR, RS, YX, SK, RC, NH, DM, RW, JL, A Spadaro, and J Perrone worked on the evaluation. A Sarker conceptualized the study, led model design and implementation, and supervised the project.</p>
      </fn>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ji</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Yu</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Ishii</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Fung</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Towards mitigating LLM hallucination via self reflection</article-title>
          <year>2023</year>
          <conf-name>Findings of the Association for Computational Linguistics: EMNLP 2023</conf-name>
          <conf-date>December 6-10, 2023</conf-date>
          <conf-loc>Singapore, Singapore</conf-loc>
          <fpage>1827</fpage>
          <lpage>1843</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://aclanthology.org/2023.findings-emnlp.123/"/>
          </comment>
          <pub-id pub-id-type="doi">10.18653/v1/2023.findings-emnlp.123</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Dhuliawala</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Komeili</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Raileanu</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Celikyilmaz</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Weston</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Chain-of-verification reduces hallucination in large language models</article-title>
          <source>arXiv</source>
          <comment>Preprint posted online on September 20, 2023</comment>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://arxiv.org/abs/2309.11495"/>
          </comment>
          <pub-id pub-id-type="doi">10.18653/v1/2024.findings-acl.212</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ge</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Sun</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Owens</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Galvez</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Gologorskaya</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Lai</surname>
              <given-names>JC</given-names>
            </name>
            <name name-style="western">
              <surname>Pletcher</surname>
              <given-names>MJ</given-names>
            </name>
            <name name-style="western">
              <surname>Lai</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Development of a liver disease-specific large language model chat interface using retrieval augmented generation</article-title>
          <source>medRxiv</source>
          <comment>Preprint posted online on November 11, 2023</comment>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/37986764"/>
          </comment>
          <pub-id pub-id-type="doi">10.1101/2023.11.10.23298364</pub-id>
          <pub-id pub-id-type="medline">37986764</pub-id>
          <pub-id pub-id-type="pii">2023.11.10.23298364</pub-id>
          <pub-id pub-id-type="pmcid">PMC10659484</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Xiong</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Jin</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Lu</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Benchmarking retrieval-augmented generation for medicine</article-title>
          <source>arXiv</source>
          <comment>Preprint posted online on February 20, 2024</comment>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://arxiv.org/abs/2402.13178"/>
          </comment>
          <pub-id pub-id-type="doi">10.18653/v1/2024.findings-acl.372</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>McNamara</surname>
              <given-names>SL</given-names>
            </name>
            <name name-style="western">
              <surname>Yi</surname>
              <given-names>PH</given-names>
            </name>
            <name name-style="western">
              <surname>Lotter</surname>
              <given-names>W</given-names>
            </name>
          </person-group>
          <article-title>The clinician-AI interface: intended use and explainability in FDA-cleared AI devices for medical image interpretation</article-title>
          <source>NPJ Digital Med</source>
          <year>2024</year>
          <volume>7</volume>
          <issue>1</issue>
          <fpage>80</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/s41746-024-01080-1"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41746-024-01080-1</pub-id>
          <pub-id pub-id-type="medline">38531952</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41746-024-01080-1</pub-id>
          <pub-id pub-id-type="pmcid">PMC10966080</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ghosh</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Tyagi</surname>
              <given-names>U</given-names>
            </name>
            <name name-style="western">
              <surname>Kumar</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Manoch</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>BioAug: conditional generation based data augmentation for low-resource biomedical NER</article-title>
          <year>2023</year>
          <conf-name>SIGIR '23: The 46th International ACM SIGIR Conference on Research and Development in Information Retrieval</conf-name>
          <conf-date>July 27, 2023</conf-date>
          <conf-loc>Taipei, Taiwan</conf-loc>
          <pub-id pub-id-type="doi">10.1145/3539618.3591957</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Somani</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Balla</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Peng</surname>
              <given-names>AW</given-names>
            </name>
            <name name-style="western">
              <surname>Dudum</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Jain</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Nasir</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Maron</surname>
              <given-names>DJ</given-names>
            </name>
            <name name-style="western">
              <surname>Hernandez-Boussard</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Rodriguez</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <article-title>Contemporary attitudes and beliefs on coronary artery calcium from social media using artificial intelligence</article-title>
          <source>NPJ Digital Med</source>
          <year>2024</year>
          <volume>7</volume>
          <issue>1</issue>
          <fpage>83</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/s41746-024-01077-w"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41746-024-01077-w</pub-id>
          <pub-id pub-id-type="medline">38555387</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41746-024-01077-w</pub-id>
          <pub-id pub-id-type="pmcid">PMC10981728</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <collab>Teknium</collab>
            <collab>theemozilla</collab>
            <collab>karan4d</collab>
            <collab>huemin_art</collab>
          </person-group>
          <article-title>Nous-Hermes-2-Mistral-7B-DPO</article-title>
          <source>Hugging Face</source>
          <access-date>2024-07-10</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://huggingface.co/NousResearch/Nous-Hermes-2-Mistral-7B-DPO">https://huggingface.co/NousResearch/Nous-Hermes-2-Mistral-7B-DPO</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Achiam</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Adler</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Agarwal</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Ahmad</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Akkaya</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Leoni Aleman</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Almeida</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Altenschmidt</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Altman</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Anadkat</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Avila</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Babuschkin</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Balaji</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Balcom</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Baltescu</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Bao</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>GPT-4 technical report</article-title>
          <source>arXiv</source>
          <comment>Preprint posted online on March 15, 2023</comment>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://arxiv.org/abs/2303.08774"/>
          </comment>
          <pub-id pub-id-type="doi">10.48550/arXiv.2303.08774</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chaput</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Whoosh 2.7.4 documentation</article-title>
          <source>Whoosh</source>
          <access-date>2024-07-10</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://whoosh.readthedocs.io/en/latest/">https://whoosh.readthedocs.io/en/latest/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>CY</given-names>
            </name>
          </person-group>
          <article-title>ROUGE: a package for automatic evaluation of summaries</article-title>
          <year>2004</year>
          <conf-name>Text Summarization Branches Out</conf-name>
          <conf-date>July 25-26, 2004</conf-date>
          <conf-loc>Barcelona, Spain</conf-loc>
          <fpage>74</fpage>
          <lpage>81</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://aclanthology.org/W04-1013.pdf"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Papineni</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Roukos</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Ward</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Zhu</surname>
              <given-names>WJ</given-names>
            </name>
          </person-group>
          <article-title>BLEU: a method for automatic evaluation of machine translation</article-title>
          <year>2024</year>
          <conf-name>Proceedings of the 40th Annual Meeting of the Association for Computational Linguistics</conf-name>
          <conf-date>July 7-12, 2002</conf-date>
          <conf-loc>Philadelphia, PA</conf-loc>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://aclanthology.org/P02-1040.pdf"/>
          </comment>
          <pub-id pub-id-type="doi">10.3115/1073083.1073135</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Coleman</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Liau</surname>
              <given-names>TL</given-names>
            </name>
          </person-group>
          <article-title>A computer readability formula designed for machine scoring</article-title>
          <source>J Appl Psychol</source>
          <year>1975</year>
          <volume>60</volume>
          <issue>2</issue>
          <fpage>283</fpage>
          <lpage>284</lpage>
          <pub-id pub-id-type="doi">10.1037/h0076540</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Virtanen</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Gommers</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Oliphant</surname>
              <given-names>TE</given-names>
            </name>
            <name name-style="western">
              <surname>Haberland</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Reddy</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Cournapeau</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Burovski</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Peterson</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Weckesser</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Bright</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>van der Walt</surname>
              <given-names>SJ</given-names>
            </name>
            <name name-style="western">
              <surname>Brett</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Wilson</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Millman</surname>
              <given-names>KJ</given-names>
            </name>
            <name name-style="western">
              <surname>Mayorov</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Nelson</surname>
              <given-names>ARJ</given-names>
            </name>
            <name name-style="western">
              <surname>Jones</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Kern</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Larson</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Carey</surname>
              <given-names>CJ</given-names>
            </name>
            <name name-style="western">
              <surname>Polat</surname>
              <given-names>İ</given-names>
            </name>
            <name name-style="western">
              <surname>Feng</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Moore</surname>
              <given-names>EW</given-names>
            </name>
            <name name-style="western">
              <surname>VanderPlas</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Laxalde</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Perktold</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Cimrman</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Henriksen</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Quintero</surname>
              <given-names>EA</given-names>
            </name>
            <name name-style="western">
              <surname>Harris</surname>
              <given-names>CR</given-names>
            </name>
            <name name-style="western">
              <surname>Archibald</surname>
              <given-names>AM</given-names>
            </name>
            <name name-style="western">
              <surname>Ribeiro</surname>
              <given-names>AH</given-names>
            </name>
            <name name-style="western">
              <surname>Pedregosa</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>van Mulbregt</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>SciPy 1.0: fundamental algorithms for scientific computing in Python</article-title>
          <source>Nat Methods</source>
          <year>2020</year>
          <volume>17</volume>
          <issue>3</issue>
          <fpage>261</fpage>
          <lpage>272</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://air.unimi.it/handle/2434/848184"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41592-019-0686-2</pub-id>
          <pub-id pub-id-type="medline">32015543</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41592-019-0686-2</pub-id>
          <pub-id pub-id-type="pmcid">PMC7056644</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Subbiah</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>SS</given-names>
            </name>
            <name name-style="western">
              <surname>Chilton</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>McKeown</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Reading subtext: evaluating large language models on short story summarization with writers</article-title>
          <source>arXiv</source>
          <comment>Preprint posted online on March 02, 2024</comment>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://arxiv.org/abs/2403.01061"/>
          </comment>
          <pub-id pub-id-type="doi">10.1162/tacl_a_00702</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hammane</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Ben-Bouazza</surname>
              <given-names>FE</given-names>
            </name>
            <name name-style="western">
              <surname>Fennan</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>SelfRewardRAG: enhancing medical reasoning with retrieval-augmented generation and self-evaluation in large language models</article-title>
          <year>2024</year>
          <conf-name>International Conference on Intelligent Systems and Computer Vision (ISCV)</conf-name>
          <conf-date>August 12, 2024</conf-date>
          <conf-loc>Fez, Morocco</conf-loc>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://ieeexplore.ieee.org/abstract/document/10620139/"/>
          </comment>
          <pub-id pub-id-type="doi">10.1109/iscv60512.2024.10620139</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Alkhalaf</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Yu</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Yin</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Deng</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Applying generative AI with retrieval augmented generation to summarize and extract key clinical information from electronic health records</article-title>
          <source>J Biomed Inform</source>
          <year>2024</year>
          <volume>156</volume>
          <fpage>104662</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(24)00080-7"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2024.104662</pub-id>
          <pub-id pub-id-type="medline">38880236</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(24)00080-7</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Soong</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Sridhar</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Si</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Wagner</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Sá</surname>
              <given-names>ACC</given-names>
            </name>
            <name name-style="western">
              <surname>Yu</surname>
              <given-names>CY</given-names>
            </name>
            <name name-style="western">
              <surname>Karagoz</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Guan</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Kumar</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Hamadeh</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Higgs</surname>
              <given-names>BW</given-names>
            </name>
          </person-group>
          <article-title>Improving accuracy of GPT-3/4 results on biomedical data using a retrieval-augmented language model</article-title>
          <source>PLOS Digital Health</source>
          <year>2024</year>
          <volume>3</volume>
          <issue>8</issue>
          <fpage>e0000568</fpage>
          <pub-id pub-id-type="doi">10.1371/journal.pdig.0000568</pub-id>
          <pub-id pub-id-type="medline">39167594</pub-id>
          <pub-id pub-id-type="pii">PDIG-D-23-00346</pub-id>
          <pub-id pub-id-type="pmcid">PMC11338460</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Jeong</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Sohn</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Sung</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Kang</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Improving medical reasoning through retrieval and self-reflection with retrieval-augmented large language models</article-title>
          <source>Bioinformatics</source>
          <year>2024</year>
          <volume>40</volume>
          <issue>Suppl 1</issue>
          <fpage>i119</fpage>
          <lpage>i129</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/38940167"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/bioinformatics/btae238</pub-id>
          <pub-id pub-id-type="medline">38940167</pub-id>
          <pub-id pub-id-type="pii">7700892</pub-id>
          <pub-id pub-id-type="pmcid">PMC11211826</pub-id>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
