<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="research-article"><front><journal-meta><journal-id journal-id-type="nlm-ta">J Med Internet Res</journal-id><journal-id journal-id-type="publisher-id">jmir</journal-id><journal-id journal-id-type="index">1</journal-id><journal-title>Journal of Medical Internet Research</journal-title><abbrev-journal-title>J Med Internet Res</abbrev-journal-title><issn pub-type="epub">1438-8871</issn><publisher><publisher-name>JMIR Publications</publisher-name><publisher-loc>Toronto, Canada</publisher-loc></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">v27i1e64901</article-id><article-id pub-id-type="doi">10.2196/64901</article-id><article-categories><subj-group subj-group-type="heading"><subject>Original Paper</subject></subj-group></article-categories><title-group><article-title>A Multimodal Analysis of Online Information Foraging in Health-Related Topics Based on Stimulus-Engagement Alignment: Observational Feasibility Study</article-title></title-group><contrib-group><contrib contrib-type="author" corresp="yes"><name name-style="western"><surname>Z&#x00F6;rg&#x0151;</surname><given-names>Szilvia</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Peters</surname><given-names>Gjalt-Jorn</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Jeney</surname><given-names>Anna</given-names></name><degrees>MA</degrees><xref ref-type="aff" rid="aff3">3</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Kov&#x00E1;cs</surname><given-names>Szil&#x00E1;rd D&#x00E1;vid</given-names></name><degrees>DDS</degrees><xref ref-type="aff" rid="aff4">4</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Crutzen</surname><given-names>Rik</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib></contrib-group><aff id="aff1"><institution>Faculty of Health, Medicine and Life Sciences, Maastricht University</institution><addr-line>P.O. Box 616</addr-line><addr-line>Maastricht</addr-line><country>The Netherlands</country></aff><aff id="aff2"><institution>Faculty of Psychology, Open University of the Netherlands</institution><addr-line>Heerlen</addr-line><country>The Netherlands</country></aff><aff id="aff3"><institution>College of Social Sciences, Seoul National University</institution><addr-line>Seoul</addr-line><country>Republic of Korea</country></aff><aff id="aff4"><institution>Faculty of Medicine, Semmelweis University</institution><addr-line>Budapest</addr-line><country>Hungary</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Sarvestan</surname><given-names>Javad</given-names></name></contrib></contrib-group><contrib-group><contrib contrib-type="reviewer"><name name-style="western"><surname>Harun</surname><given-names>Ahasan</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Pang</surname><given-names>Patrick Cheong-Iao</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to Szilvia Z&#x00F6;rg&#x0151;, PhD, Faculty of Health, Medicine and Life Sciences, Maastricht University, P.O. Box 616, Maastricht, 6200 MD, The Netherlands, 31 308622466; <email>s.zorgo@maastrichtuniversity.nl</email></corresp></author-notes><pub-date pub-type="collection"><year>2025</year></pub-date><pub-date pub-type="epub"><day>14</day><month>7</month><year>2025</year></pub-date><volume>27</volume><elocation-id>e64901</elocation-id><history><date date-type="received"><day>30</day><month>07</month><year>2024</year></date><date date-type="rev-recd"><day>06</day><month>04</month><year>2025</year></date><date date-type="accepted"><day>07</day><month>04</month><year>2025</year></date></history><copyright-statement>&#x00A9; Szilvia Z&#x00F6;rg&#x0151;, Gjalt-Jorn Peters, Anna Jeney, Szil&#x00E1;rd D&#x00E1;vid Kov&#x00E1;cs, Rik Crutzen. Originally published in the Journal of Medical Internet Research (<ext-link ext-link-type="uri" xlink:href="https://www.jmir.org">https://www.jmir.org</ext-link>), 14.7.2025. </copyright-statement><copyright-year>2025</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in the Journal of Medical Internet Research (ISSN 1438-8871), is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="https://www.jmir.org/">https://www.jmir.org/</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://www.jmir.org/2025/1/e64901"/><abstract><sec><title>Background</title><p>The recent increase in online health information&#x2013;seeking has prompted extensive user appraisal of encountered content. Information consumption depends crucially on the quality of encountered information and the user&#x2019;s ability to evaluate it; yet, within the context of web-based, organic search behavior, few studies take into account both these aspects simultaneously.</p></sec><sec><title>Objective</title><p>We aimed to explore a method to bridge these two aspects and grant even consideration to both the stimulus (web page content) and the user (ability to appraise encountered content). We examined novices and experts in information retrieval and appraisal to demonstrate a novel approach to studying information foraging theory: stimulus-engagement alignment (SEA).</p></sec><sec sec-type="methods"><title>Methods</title><p>We sampled from experts and novices in information retrieval and assessment, asking participants to conduct a 10-minute search task with a specific information goal. We used an observational and a retrospective think-aloud protocol to collect data within the framework of an interview. Data from 3 streams (think-aloud, human-computer interaction, and screen content) were manually coded in the Reproducible Open Coding Kit standard and subsequently aligned and represented in a tabularized format with the R package {rock}. SEA scores were derived from designated code co-occurrences in specific segments of data within the stimulus data stream versus the think-aloud and human-computer interaction data streams.</p></sec><sec sec-type="results"><title>Results</title><p>SEA scores represented a meaningful comparison of what participants encountered and what they engaged with. Operationalizing codes as either &#x201C;present&#x201D; or &#x201C;absent&#x201D; in a particular data stream allowed us to inspect not only which credibility cues participants engaged with with the most frequency, but also whether participants noticed the absence of cues. Code co-occurrence frequencies could thus indicate case-, time-, and context-sensitive information appraisal that also takes into account the quality of information encountered.</p></sec><sec sec-type="conclusions"><title>Conclusions</title><p>Using SEA allowed us to retain epistemic access to idiosyncratic manifestations of both stimuli and engagement. In addition, by using the same coding scheme and designated co-occurrences across participants, we were able to pinpoint trends within our sample and subsamples. We believe our approach offers a powerful analysis encompassing the breadth and depth of data, both on par with each other in the feat of understanding organic, web-based search behavior.</p></sec></abstract><kwd-group><kwd>methodology</kwd><kwd>information appraisal</kwd><kwd>multimodal data</kwd><kwd>data visualization</kwd><kwd>digital health literacy</kwd><kwd>information-seeking</kwd><kwd>credibility</kwd><kwd>health information</kwd><kwd>information foraging</kwd><kwd>information retrieval</kwd></kwd-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><sec id="s1-1"><title>Background</title><p>In recent years, there has been a marked increase in online health information&#x2013;seeking, with more than half of European Union citizens aged 16&#x2010;74 years reporting such behavior [<xref ref-type="bibr" rid="ref1">1</xref>] and global trends exhibiting similar tendencies [<xref ref-type="bibr" rid="ref2">2</xref>]. Web-based information-seeking merits particular attention, as the current information ecosystem is riddled with contradictions and inaccuracies [<xref ref-type="bibr" rid="ref3">3</xref>]; many concerns have been raised about the quality of online health information encountered by users [<xref ref-type="bibr" rid="ref4">4</xref>]. In this information ecosystem, users must undertake an active role in evaluating the trustworthiness and accuracy of content [<xref ref-type="bibr" rid="ref4">4</xref>-<xref ref-type="bibr" rid="ref6">6</xref>], but studies have shown users demonstrating poor skills in this area [<xref ref-type="bibr" rid="ref7">7</xref>,<xref ref-type="bibr" rid="ref8">8</xref>]. An improved understanding of how user characteristics and mental models influence web-based behavior is needed for interventions aimed at improving the retrieval of high-quality information.</p><p>Digital health literacy (DHL)&#x2014;the ability to find, understand, and evaluate health information in digital environments and apply it to health behavior [<xref ref-type="bibr" rid="ref9">9</xref>,<xref ref-type="bibr" rid="ref10">10</xref>]&#x2014;is a core competence for navigating online information and making decisions [<xref ref-type="bibr" rid="ref2">2</xref>], which is associated with general health as well as several of its determinants [<xref ref-type="bibr" rid="ref11">11</xref>]. Higher levels of DHL have been found to correlate with better health outcomes [<xref ref-type="bibr" rid="ref4">4</xref>,<xref ref-type="bibr" rid="ref12">12</xref>], desirable health behaviors (eg, prevention and management of chronic disease) [<xref ref-type="bibr" rid="ref10">10</xref>], and a well-functioning patient-physician relationship [<xref ref-type="bibr" rid="ref2">2</xref>]. Vulnerability to low DHL is most common among populations with higher age, lower socioeconomic position, underserved neighborhoods, and degraded physical environments [<xref ref-type="bibr" rid="ref3">3</xref>].</p><p>DHL is commonly conceptualized as a formative construct consisting of four subconstructs: ability to (1) access or obtain (find), (2) understand, (3) appraise (evaluate), and (4) apply information relevant to health [<xref ref-type="bibr" rid="ref3">3</xref>]. Past studies on DHL have primarily measured the respondent&#x2019;s own assessment of their health literacy as applicable to digital contexts, which may aid in obtaining estimates of an individual&#x2019;s confidence in their own skills. This self-assessment requires considerable metacognitive skills, yet often fails to provide valid measurements of the target construct [<xref ref-type="bibr" rid="ref13">13</xref>]. Other studies have observed users using their DHL in online environments, but these have been limited to mock sites [<xref ref-type="bibr" rid="ref14">14</xref>], confined search spaces [<xref ref-type="bibr" rid="ref15">15</xref>-<xref ref-type="bibr" rid="ref17">17</xref>], and outcome measures that do not yield much information on the drivers of behavior (eg, search time, queries, clickstream data [<xref ref-type="bibr" rid="ref18">18</xref>]). Hence, such studies capitalize on a participant&#x2019;s ability to retrieve specific information and the usability of a particular website, but do not lend thorough insight into organic searches, that is, nondirected browsing behavior during ill-structured tasks (ie, problems that are not clearly defined, which have multiple solution paths [<xref ref-type="bibr" rid="ref19">19</xref>]).</p><p>Many scholars argue for the primacy of the subconstructs appraisal or evaluation within DHL. The evaluation of the quality of encountered content often focuses on the credibility or trustworthiness of information, which is understood as perspectival to the user and not an innate feature of information. Yet, credibility is commonly thought to be influenced by cues that surround the presentation of information [<xref ref-type="bibr" rid="ref20">20</xref>]. Several studies aim to create lists of such credibility cues (relatively stable structural web page features, eg, contact information, links to external websites [<xref ref-type="bibr" rid="ref21">21</xref>]) from participant narratives, usually gathered with concurrent and retrospective think-alouds (TAs) during organic or restricted search activities [<xref ref-type="bibr" rid="ref8">8</xref>]. Other research has yielded credibility criteria checklists that aim to generalize credibility cues, such as HON scores, JAMA Benchmarks, the EQIP Tool, and the DISCERN Tool [<xref ref-type="bibr" rid="ref22">22</xref>].</p><p>The existence of these two general approaches draws attention to the fact that consuming (obtaining, understanding, appraising, and using) good quality health-related information online depends crucially on (1) the quality of encountered information and (2) the user&#x2019;s ability to recognize the quality of encountered information. Yet, few studies take into account both these aspects of information consumption simultaneously.</p><p>The objective of our feasibility study is to explore a method to bridge these two aspects of health-related information consumption and grant even consideration to both the stimulus (web page content) and the user (ability to appraise encountered content) within the context of web-based, organic search behavior. We examined novices and experts in information retrieval and appraisal to demonstrate a novel approach, stimulus-engagement alignment (SEA), which we situate as an extension of information foraging theory.</p></sec><sec id="s1-2"><title>Information Foraging Theory</title><p>Information foraging (IF) guides research on how users navigate among sources of online information, as well as predicts their behavior [<xref ref-type="bibr" rid="ref23">23</xref>]. The theory stipulates that humans &#x201C;forage&#x201D; for information as other animals forage for food. An individual&#x2019;s information environment consists of a combination of (1) stimuli (verbal, visual, etc) that are perceptually accessible (eg, text in a book, a diagram on a website) and (2) opportunities for interaction that increase perceivable information (eg, by scrolling down on a web page or clicking on a hyperlink). Information environments are &#x201C;patchy&#x201D; because information is clustered in certain areas, for example, on bookshelves, in libraries, or on websites [<xref ref-type="bibr" rid="ref23">23</xref>,<xref ref-type="bibr" rid="ref24">24</xref>]. Individuals generally engage with their information environment to achieve an active information goal, to obtain declarative knowledge about the world.</p><p>IF states that a user&#x2019;s navigation in and among information patches depends on &#x201C;information scent&#x201D;; a high information scent leads to foraging behavior that aids the user in achieving an active information goal [<xref ref-type="bibr" rid="ref23">23</xref>]. Information scent is conceptualized as a property of the stimulus [<xref ref-type="bibr" rid="ref17">17</xref>]; users engage with environmental stimuli that are assumed to have &#x201C;the maximum expected utility&#x201D; in achieving an information goal [<xref ref-type="bibr" rid="ref23">23</xref>], that is, certain content has higher information scent compared to others. Some authors using IF acknowledge the user perspective in determining information scent conceptually, such as Nwagwu stating that it is &#x201C;the perceived value of a source of information based on cues such as links, abstracts, or summaries&#x201D; [<xref ref-type="bibr" rid="ref25">25</xref>], but the theoretical framework and computational models do not have a straightforward way of incorporating user appraisal.</p><p>In work thus far, chiefly by Pirolli et al [<xref ref-type="bibr" rid="ref23">23</xref>,<xref ref-type="bibr" rid="ref26">26</xref>], information scent has been computed with semantic similarity (proximity of words within documents in a corpus) with natural language processing techniques, such as latent semantic analysis and latent Dirichlet allocation. A chosen corpus and the derived word proximity scores form what is referred to as a &#x201C;scent database,&#x201D; which, in turn, serves as the basis of computing information scent. Typically, researchers use a large textual database (eg, a corpus containing thousands of news articles) and, since proximity scores are derived from associations between words in the task description and textual environmental stimuli, they are considered uniform across users who share an information goal. Associations between content and goal can exhibit different strengths: a strong association is interpreted as high relevance to the goal. Association strengths among words in human memory are assumed to be related to the probabilities of word occurrences and co-occurrences. Due to the fact that this understanding of information scent relies on word-based, standardized associations, it does not easily accommodate varying user representations of utility or meaning. This operationalization of information scent also fails to take into account the correspondence between encountered stimuli and their appraisal, as users in IF studies are generally not asked to evaluate or interpret the content they encounter.</p></sec><sec id="s1-3"><title>Proposed Modifications to IF Theory</title><sec id="s1-3-1"><title>Modifying the Concept of Information Scent</title><p>We define information scent as a person&#x2019;s representation of the extent to which investing in a given patch of their information environment will aid progression toward a given information goal; thus, information scent is constituted by both the user and the stimuli in the information environment. Patches in themselves cannot have information scent: instead, they have attributes that contribute to information scent, but with varying degrees chiefly depending on the users&#x2019; extant internal representations. Hence, information scent can vary in the same person over time and context, depending on which information goals are active at any given time and the knowledge they have accumulated over a task. One implication of this definition is that, as opposed to previous studies (eg, [<xref ref-type="bibr" rid="ref26">26</xref>]), different users can have different information scent values for the same patch; in addition, as a person progresses toward their information goal, the information scent of a given patch can increase or decrease.</p></sec><sec id="s1-3-2"><title>Replacing Semantic Similarity With Code Co-occurrences Computed With SEA</title><p>We used code co-occurrences instead of semantic similarity to compute information scent. Codes are labels attached to data fragments that denote the observation that that fragment expresses a construct of interest (in our case, credibility cues). Owing to their conceptual nature, we assume that codes are more adequate at capturing meaning than individual words alone, and therefore they may be used to create more accurate models of the data. Furthermore, codes can be applied to nontextual credibility cues as well, hence opening up possibilities to move beyond the study of textual cues, which have characterized relevant research in this area thus far [<xref ref-type="bibr" rid="ref25">25</xref>]. We define code co-occurrence as the state where two codes are applied within the same patch. Our scent database is constituted by the co-occurrence of particular, deductively applied codes within designated segments of data, and these code co-occurrences are computed with SEA. In this process, we compare the presence of code pairs in lines of data representing encountered online content and user engagement, and control for the number of visited patches to arrive at SEA values ranging from 0 to 1.</p><p>To examine the feasibility of using SEA in studying the proposed modifications to IF theory, we explore information scent (SEA values) for two groups (novices and experts of information retrieval and assessment) in their performance of organic, web-based information-seeking regarding a health-related subject.</p></sec></sec></sec><sec id="s2" sec-type="methods"><title>Methods</title><sec id="s2-1"><title>Ethical Considerations</title><p>Ethics approval was gained from the University of Wisconsin-Madison Institutional Review Board. The study (2022&#x2010;0588) was determined to meet the criteria for exempt human subjects research as defined under 45 CFR 46:(3)(i)(B). Written informed consent to participate in the study and make their anonymized data public was obtained from participants before study participation. In accordance with open science principles, our entire project (eg, preregistration, recruitment materials, data collection instruments, codebook, analyses), including our data, is publicly accessible in our Open Science Framework repository [<xref ref-type="bibr" rid="ref27">27</xref>]. Participants were compensated for their time with a US $30 Amazon gift card.</p></sec><sec id="s2-2"><title>Sampling Considerations</title><p>We sampled from two populations of internet users in our study. The first population consists of experts in information retrieval and assessment: individuals who have educational or work experience as librarians, journalists, or related professions (the expert subsample). The second population includes individuals with different backgrounds (the novice subsample). Our general sampling strategy was purposive, and we aimed for homogeneity within and across subsamples with respect to the following criteria (references support the criterion&#x2019;s relevance in literature affecting computer skills and information literacy): geographical region [<xref ref-type="bibr" rid="ref28">28</xref>] (limited to Wisconsin, United States), age [<xref ref-type="bibr" rid="ref29">29</xref>] (chosen range: 18&#x2010;39 years), and since the search task involved an embedded biasing advantage in knowledge of Chinese language or culture, we aimed to exclude participants affected by these (ie, knowledge of Mandarin and Chinese ethnicity were exclusion criteria). We included 10 participants in both subsamples (N=20), which was (1) adequate to test the feasibility of our framework and tool and (2) feasible, as we used manual coding for all data types and had to plan realistically in terms of what we were capable of curating and coding within our time frame.</p></sec><sec id="s2-3"><title>Data Collection</title><p>The expert subsample was recruited via email from the University of Wisconsin-Madison Information School, as well as from the following departments: (1) Library and Information Studies and (2) Journalism. The novice subsample was recruited via a university mailing list. We used observational and TA protocols to collect data on information foraging within the framework of an interview. Our observational protocol contained the specifics of observing online foraging in a 10-minute search task with the information goal of learning about various COVID-19 origin theories; the task description is displayed in <xref ref-type="other" rid="box1">Textbox 1</xref>.</p><boxed-text id="box1"><title> Description for online search task.</title><p>"Some people think that the SARS-CoV-2 virus, which causes the disease COVID-19, originated in a laboratory in Wuhan, while others think that the first infection was caused by the virus being transmitted from a bat to a person. There are many theories on where and how the virus originated. Do some research on this topic online and, after 10 minutes, I will ask you some questions about the origin of the virus.&#x201D;</p></boxed-text><p>The retrospective TA protocol standardized how self-reflection on behavior was elicited following the task, namely, questions to pose when the participant enters a web page, leaves a web page, or engages with content. Questions probed general impressions and indicators of trustworthiness for all visited web pages, as well as why participants engaged and disengaged with them. Both protocols were conducted online via a videoconferencing platform. Additionally, we used a pretask survey to collect basic sociodemographic data (detailed in the next section). Thus, the data collection process yielded survey data, as well as one video (10-minute task) and one audio-video recording (retrospective TA) per participant.</p></sec><sec id="s2-4"><title>Specifying Case and Patch Attributes</title><sec id="s2-4-1"><title>Case Attributes</title><p>Responses were downloaded from our survey platform in a CSV file; case attributes were specified in YAML, a human- and machine-readable data serialization language. The following variables constituted our case attributes: caseID, groupID (subsample), sex, gender, ethnicity, race, nationality, level of education, political affiliation (see the codebook in our repository for more on these variables). In this study, we used caseIDs and groupIDs for analytical purposes; other variables only constituted part of the sample description.</p></sec><sec id="s2-4-2"><title>Patch Attributes</title><p>We defined patches as physical or virtual environments where produced and/or curated information can be stored and potentially accessed by users; we operationalized patches as any type of file that a browser can render when a user visits a particular URL. This included search engines, stand-alone PDFs, and pictures viewed on a separate page. <xref ref-type="table" rid="table1">Table 1</xref> contains patch attributes and their descriptions, which were logged in the plain text files constituting the screen content data stream and represented in YAML, as shown in <xref ref-type="other" rid="box2">Textbox 2</xref>.</p><table-wrap id="t1" position="float"><label>Table 1.</label><caption><p>Examples of patch attributes and how they were represented in YAML.</p></caption><table id="table1" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Attribute</td><td align="left" valign="bottom">Description</td><td align="left" valign="bottom">Examples</td></tr></thead><tbody><tr><td align="left" valign="top">Patch tag</td><td align="left" valign="top">Nonunique label assigned to describe main website content and generator of content</td><td align="left" valign="top">PubMed, CNBC news</td></tr><tr><td align="left" valign="top">Domain</td><td align="left" valign="top">Second and top-level domain (and subdomain, where applicable)</td><td align="left" valign="top">google.com, ncbi.nlm.nih.gov</td></tr><tr><td align="left" valign="top">Patch identifier</td><td align="left" valign="top">Contains case identifier and the ordinal numbering of patches within a case&#x2019;s search</td><td align="left" valign="top">patch_001_1, patch_002_15</td></tr><tr><td align="left" valign="top">Patch type</td><td align="left" valign="top">Categorical value indicating general type/function of website</td><td align="left" valign="top">Engine, SERP<sup><xref ref-type="table-fn" rid="table1fn1">a</xref></sup>, page, file</td></tr><tr><td align="left" valign="top">Start time</td><td align="left" valign="top">Start of activity on patch (marked by change in URL)</td><td align="left" valign="top">&#x201C;00:02&#x201D;</td></tr><tr><td align="left" valign="top">End time</td><td align="left" valign="top">End of activity on patch (marked by leaving patch, ie, closing the tab/window or moving to another one or change in URL)</td><td align="left" valign="top">&#x201C;00:15&#x201D;</td></tr></tbody></table><table-wrap-foot><fn id="table1fn1"><p><sup>a</sup>SERP: search engine results page.</p></fn></table-wrap-foot></table-wrap><boxed-text id="box2"><title> Attributes in YAML.</title><p>&#x2013; &#x2013; &#x2013;</p><p>ROCK_attributes:</p><p>&#x2013;</p><p><named-content content-type="indent">&#x2003;</named-content>patchTag: "UW-Madison Libraries"</p><p><named-content content-type="indent">&#x2003;</named-content>domain: "library.wisc.edu"</p><p><named-content content-type="indent">&#x2003;</named-content>pid: "patch_006_1"</p><p><named-content content-type="indent">&#x2003;</named-content>patchType: "engine/SERP"</p><p><named-content content-type="indent">&#x2003;</named-content>start: "00:00"</p><p><named-content content-type="indent">&#x2003;</named-content>end: "00:03"</p><p>&#x2013;</p><p/><p><named-content content-type="indent">&#x2003;</named-content>patchTag: "Google All"</p><p><named-content content-type="indent">&#x2003;</named-content>domain: "google.com"</p><p><named-content content-type="indent">&#x2003;</named-content>pid: "patch_006_2"</p><p><named-content content-type="indent">&#x2003;</named-content>patchType: "engine/SERP"</p><p><named-content content-type="indent">&#x2003;</named-content>start: "00:04"</p><p><named-content content-type="indent">&#x2003;</named-content>end: &#x201C;00:08&#x201D;</p><p>&#x2013; &#x2013; &#x2013;</p></boxed-text><p>Patch tags were fully inductive, based on the URL and/or page title. Patch attributes &#x201C;start&#x201D; and &#x201C;end,&#x201D; indicating when a participant entered and left a patch, were transformed into the variable &#x201C;totalTime&#x201D; containing the number of seconds spent on a patch; for more on data transformations, see the &#x201C;Data_Transformations&#x201D; directory within our repository. For a full description of patch attributes, see our codebook (available at [<xref ref-type="bibr" rid="ref30">30</xref>]).</p><p>Although technically an attribute of patches, the type of content they exhibited were specified as &#x201C;states&#x201D; to enable exploring transitions between patches (for more information, see the &#x201C;Analyses and Modeling&#x201D; section). Patch content codes were developed inductively by 2 coders independently reviewing the entire dataset, triangulating their tentative categories in one round, resolving differences via social moderation, and finalizing the 6 categories contained in <xref ref-type="table" rid="table2">Table 2</xref>.</p><table-wrap id="t2" position="float"><label>Table 2.</label><caption><p>Codebook containing patch content type labels, definitions, and examples.</p></caption><table id="table2" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Code label</td><td align="left" valign="bottom">Definition</td><td align="left" valign="bottom">Examples</td></tr></thead><tbody><tr><td align="left" valign="top">Academia</td><td align="left" valign="top">Websites of scientific institutes with academically approved content</td><td align="left" valign="top">Wiley, University of Illinois, National Library of Medicine, PubMed</td></tr><tr><td align="left" valign="top">Government</td><td align="left" valign="top">Websites of governmental organizations and other authorities, both national and international</td><td align="left" valign="top">Center for Disease Control and Prevention, US Department of Defense, World Health Organization</td></tr><tr><td align="left" valign="top">News</td><td align="left" valign="top">News sites with content often created by journalists targeting the general public</td><td align="left" valign="top">CNN, <italic>The New York Times</italic>, Al Jazeera</td></tr><tr><td align="left" valign="top">Science or health communication</td><td align="left" valign="top">Science- or medicine-related websites, with no academic affiliation</td><td align="left" valign="top">Wikipedia, WebMD, Nature</td></tr><tr><td align="left" valign="top">Other</td><td align="left" valign="top">Miscellaneous websites (eg, social media sites, nongovernmental organization websites)</td><td align="left" valign="top">Reddit, Zoom, Bat Conservation Trust</td></tr><tr><td align="left" valign="top">Engine/SERP<sup><xref ref-type="table-fn" rid="table2fn1">a</xref></sup></td><td align="left" valign="top">Search engines and SERP</td><td align="left" valign="top">Google, Google Scholar, Illinois Library</td></tr></tbody></table><table-wrap-foot><fn id="table2fn1"><p><sup>a</sup>SERP: search engine results page.</p></fn></table-wrap-foot></table-wrap></sec></sec><sec id="s2-5"><title>Data Preparation and Coding</title><sec id="s2-5-1"><title>Overview</title><p>Three multimodal data streams needed to be wrangled and synchronized: human-computer interactions (HCI) and screen content from the task completion video, as well as narratives from the TA video. We aimed to represent coded data in a single, tabularized dataset for further processing. Our iterative code development process is documented in detail in our repository under &#x201C;Operationalization\Code_development.&#x201D;</p></sec><sec id="s2-5-2"><title>Data Stream: Think-Aloud</title><p>Audio data from the TAs were transcribed in an automated process, manually corrected for accuracy, anonymized, and placed into separate plain text files per data provider. Using the R package {rock} [<xref ref-type="bibr" rid="ref31">31</xref>], we segmented the data [<xref ref-type="bibr" rid="ref32">32</xref>] by sentence (a newline character was added after punctuation marks) and assigned each one a unique &#x201C;utterance identifier.&#x201D; Coding was performed on this level of segmentation. Codes capturing (textual and nontextual) credibility cues were developed inductively from the entire dataset under the parent code Appraisal (APPR) and included patch features that contributed to participants&#x2019; assessment of content and creator. Note that in this study we only disclose codes that were used for SEA; for our full coding scheme, see [<xref ref-type="bibr" rid="ref33">33</xref>]. Our coding process involved several phases:</p><list list-type="order"><list-item><p>Free inductive coding performed autonomously by 2 raters.</p></list-item><list-item><p>Triangulation and creation of a tentative codebook.</p></list-item><list-item><p>Test coding performed autonomously by 2 raters on the same subset of data.</p></list-item><list-item><p>Triangulation and repetition of steps 2 and 3 for several iterations until a final codebook was developed.</p></list-item><list-item><p>Interrater reliability testing (using Cohen &#x03BA; [<xref ref-type="bibr" rid="ref34">34</xref>] as indicator) to confirm shared understanding and pinpoint discrepancies.</p></list-item><list-item><p>Triangulation and repetition of step 5 (until Cohen &#x03BA; &#x2265;0.90 was reached for all codes).</p></list-item><list-item><p>Deductive application of final coding scheme to full dataset.</p></list-item></list><p>Relying on the final codebook, one researcher coded the corpus manually with the Interface for the Reproducible Open Coding Kit (iROCK, available at [<xref ref-type="bibr" rid="ref35">35</xref>]). To ensure consistent application of codes, we computed intrarater agreement with Cohen &#x03BA; when 50% of the dataset was coded, and when the last interview was finished, both instances yielded a &#x03BA; of at least .70 per code. <xref ref-type="table" rid="table3">Table 3</xref> displays a simplified version of TA codes within our codebook.</p><table-wrap id="t3" position="float"><label>Table 3.</label><caption><p>Codebook containing code labels, definitions, and examples for codes describing credibility cues (APPR).</p></caption><table id="table3" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Code label</td><td align="left" valign="bottom">Definition</td><td align="left" valign="bottom">Examples</td></tr></thead><tbody><tr><td align="left" valign="top">Date</td><td align="left" valign="top">Date content was created, modified, or updated</td><td align="left" valign="top">So it&#x2019;s from 2022.</td></tr><tr><td align="left" valign="top">Author</td><td align="left" valign="top">Author(s) names (full names or abbreviations, acronyms, pseudonyms)</td><td align="left" valign="top">It had multiple researchers on it too. So, like more eyes on, it could give it more credibility.</td></tr><tr><td align="left" valign="top">Author affiliation</td><td align="left" valign="top">Author affiliation, organization (via textual or visual designation, eg, seal or logo)</td><td align="left" valign="top">They&#x2019;ve put their seal on it [...] and they&#x2019;re willing to put it to their name, they&#x2019;re willing to defend it.</td></tr><tr><td align="left" valign="top">External references</td><td align="left" valign="top">Hyperlinks, citations, quotes with references, footnotes, bibliography</td><td align="left" valign="top">That&#x2019;s just a warning sign to me as, as trustworthy, like, this is a fact not cited.</td></tr><tr><td align="left" valign="top">DOI</td><td align="left" valign="top">Digital object identifier</td><td align="left" valign="top">There&#x2019;s the DOI.</td></tr><tr><td align="left" valign="top">Trusted URL</td><td align="left" valign="top">High-level domains, secure servers or padlock; implemented encryption</td><td align="left" valign="top">But I will admit that it looked like a more professional website than the CNN one in the sense that it had the .org.</td></tr><tr><td align="left" valign="top">Scientometrics</td><td align="left" valign="top">H-index, number of citations, Scimago quartile, impact factor</td><td align="left" valign="top">I was actually looking for the impact factor, doesn&#x2019;t tell me an impact factor.</td></tr><tr><td align="left" valign="top">Advertisements</td><td align="left" valign="top">Structural elements with commercial intent separate from the main content</td><td align="left" valign="top">It has a clean, ad-free homepage.</td></tr></tbody></table></table-wrap><p>Each of the 8 codes manifested as either a &#x201C;presence&#x201D; or an &#x201C;absence&#x201D; (eg, Date_pres or Date_abs), as both their presence and their absence was of interest in all 3 data streams, resulting in a total of 16 codes. The identifiers of all codes used in the TA data stream were prepended with the slug &#x201C;TA_.&#x201D;</p></sec><sec id="s2-5-3"><title>Data Stream: HCI</title><p>HCI observations were transcribed in a specific template as follows: &#x201C;Action | Content | Location.&#x201D; In this template, Action referred to physical engagement with the computer: type, click, scroll, hover, and highlight. Content indicated what the action referred to, what it was performed on, or a specification of the action (eg, the object selected, the characters typed in, or the direction of a scroll), while Location signified the place of action within the window (eg, search bar, page, hyperlink). For more details on this template and its code development, see [<xref ref-type="bibr" rid="ref33">33</xref>]. Transcribed HCI data were placed into separate plain text files per participant; codes were adopted from those developed for the TA data stream, their identifiers were prepended with the slug &#x201C;HCI_&#x201D; and applied with iROCK. HCI codes did not include a &#x201C;present&#x201D; versus an &#x201C;absent&#x201D; version, as the &#x201C;absence of HCI&#x201D; (eg, a user <italic>not</italic> interacting with a date) was not considered an element of HCI. Thus, HCI codes were constituted by those listed in <xref ref-type="table" rid="table3">Table 3</xref>.</p></sec><sec id="s2-5-4"><title>Data Stream: Screen Content</title><p>Screen content, that is, the content of web pages visited by the participant over the 10-minute search, was documented as a list of patch identifiers (pids). These identifiers were unique across cases (ie, no repetition of pids across participants), but nonunique within cases (ie, a participant could revisit a previous patch). The pids constituted the data in this data stream and were placed in separate plain text files per participant. Codes were adopted from those developed for the TA data stream; their identifiers were prepended with the slug &#x201C;SC_&#x201D; and applied with iROCK.</p></sec><sec id="s2-5-5"><title>Aligning Data Streams</title><p>To compare engagement (HCI; what participants did on a web page) and mental models (TAs; what participants said regarding a web page) with patch features (screen content), we aligned the 3 data streams by embedding anchors (text strings) after each patch in every source. We used the {rock} functionality &#x201C;Anchor-based Stream Synchronization&#x201D; to map codes from all streams onto a primary stream (TA data). <xref ref-type="fig" rid="figure1">Figure 1</xref> illustrates 3 streams of data for 2 patches, with source, stream, case, patch, utterance, code, and state identifiers, as well as anchors specified according to the Reproducible Open Coding Kit standard.</p><fig position="float" id="figure1"><label>Figure 1.</label><caption><p>Three data streams with data (gray), source/stream/case identifiers (black), patch identifiers (red), utterance identifiers (gray bold), code identifiers (green), and state identifiers (blue), as well as anchors (black bold). HCI: human-computer interactions.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="jmir_v27i1e64901_fig01.png"/></fig></sec></sec><sec id="s2-6"><title>Tabularization and Transformation</title><p>We parsed the coded and aligned data streams with the {rock} package, creating an R object that contained all data from all participants in tabular form, as well as case and patch attributes. Rows in this dataset were constituted by data segmented and aligned according to anchors (delimiting patches); columns contained data from the 3 data streams, codes (in binary form: 1 if a code was present, 0 if it was absent in the line), state identifiers, and case identifiers, as well as case and patch attributes.</p><p>To prepare for SEA computation (comparing code co-occurrences in the screen content data stream versus the other two streams), we created a new variable that combined the codes in the TA and HCI streams. If a code was present in either or both of these, the derived variable (TA-HCI) value was 1; if it was absent in both, then the value was 0. This procedure was performed separately for all codes per patch. Subsequently, we generated SEA scores per patch by comparing the TA-HCI values with those in the screen content stream. <xref ref-type="table" rid="table4">Table 4</xref> contains the code &#x201C;date present&#x201D; in 3 data streams for 5 patches (the HCI code has no present/absent versioning), as well as the derived variable TA-HCI and the SEA score.</p><table-wrap id="t4" position="float"><label>Table 4.</label><caption><p>Code &#x201C;date present&#x201D; manifesting in 5 patches and 3 data streams, with the derived variable TA-HCI (TA-HCI_Date_pres column) and SEA value (SEA_Date_pres column), as well as a qualitative description of all possible co-occurrences (final column).<sup><xref ref-type="table-fn" rid="table4fn1">a</xref></sup><sup>,</sup><sup><xref ref-type="table-fn" rid="table4fn2">b</xref></sup></p></caption><table id="table4" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Patch</td><td align="left" valign="bottom">TA_Date_pres</td><td align="left" valign="bottom">HCI_Date</td><td align="left" valign="bottom">TA-HCI_Date_pres</td><td align="left" valign="bottom">SC_Date_pres<sup><xref ref-type="table-fn" rid="table4fn3">c</xref></sup></td><td align="left" valign="bottom">SEA_Date_pres</td><td align="left" valign="bottom">Qualitative description</td></tr></thead><tbody><tr><td align="left" valign="top">1</td><td align="left" valign="top">1</td><td align="left" valign="top">0</td><td align="left" valign="top">1</td><td align="left" valign="top">1</td><td align="left" valign="top">1</td><td align="left" valign="top">Credibility cue was present in stimulus and the participant engaged with it (in think-aloud)</td></tr><tr><td align="left" valign="top">2</td><td align="left" valign="top">0</td><td align="left" valign="top">1</td><td align="left" valign="top">1</td><td align="left" valign="top">1</td><td align="left" valign="top">1</td><td align="left" valign="top">Credibility cue was present in stimulus and the participant engaged with it (in HCI)</td></tr><tr><td align="left" valign="top">3</td><td align="left" valign="top">0</td><td align="left" valign="top">0</td><td align="left" valign="top">0</td><td align="left" valign="top">1</td><td align="left" valign="top">0</td><td align="left" valign="top">Credibility cue was present in stimulus but the participant did not engage with it</td></tr><tr><td align="left" valign="top">4</td><td align="left" valign="top">1</td><td align="left" valign="top">0</td><td align="left" valign="top">1</td><td align="left" valign="top">0</td><td align="left" valign="top">0</td><td align="left" valign="top">Participant engaged with a credibility cue (in think-aloud) that was not present in the stimulus</td></tr><tr><td align="left" valign="top">5</td><td align="left" valign="top">0</td><td align="left" valign="top">0</td><td align="left" valign="top">0</td><td align="left" valign="top">0</td><td align="left" valign="top">0</td><td align="left" valign="top">Credibility cue was not present in the stimulus and the participant did not engage with it</td></tr></tbody></table><table-wrap-foot><fn id="table4fn1"><p><sup>a</sup>SEA: stimulus-engagement alignment.</p></fn><fn id="table4fn2"><p><sup>b</sup>TA-HCI: think-aloud human-computer interactions.</p></fn><fn id="table4fn3"><p><sup>c</sup>SC: screen content.</p></fn></table-wrap-foot></table-wrap><p>Our scent database consisted of relevant co-occurrences for all 16 codes (present and absent version of the eight codes disclosed in <xref ref-type="table" rid="table3">Table 3</xref>) applied to the 3 data streams. Thus, for our sample, we obtained a total of 16 SEA values, per visited patch, for each participant&#x2019;s 10-minute search.</p></sec><sec id="s2-7"><title>Analyses and Modeling</title><sec id="s2-7-1"><title>Descriptive Analyses</title><p>To illustrate the presence and absence of credibility cues in the patches visited by experts and novices, we created histograms indicating the frequency of codes in the screen content data stream of each participant (16 codes total, maximum 1 occurrence per patch). We also computed the likelihood of a participant transitioning from one patch content type to another (designated as &#x201C;states&#x201D;; see <xref ref-type="fig" rid="figure1">Figure 1</xref>) with Qualitative/Unified Exploration of State Transitions (QUEST). As part of {rock} functionality, QUEST generated Markovian models of transitions between patch content types based on a state transition network where frequencies of transitions from a state to itself and other states constituted the total transition counts for each state. QUEST models were produced from our tabularized dataset with transition counts and probabilities per participant, rounded to 2 decimals.</p></sec><sec id="s2-7-2"><title>SEA Analysis</title><p>SEA scores only pertained patches that were not search engines or search engine results pages (SERP), as these patches exhibited incomparably different credibility cues than other, substantive web pages. SEA scores were generated by computing the proportion of 1s in each column (for each of the 16 codes) per participant, thereby controlling for the varying numbers of patch visits during the 10-minute search.</p></sec></sec></sec><sec id="s3" sec-type="results"><title>Results</title><sec id="s3-1"><title>Sample Characteristics</title><p>Our sample consisted of experts (n=10; 6 males and 4 females) and novices (n=10; 5 males and 5 females) of information retrieval and appraisal. Nine of 10 experts and 2 of 10 novices had earned at least a bachelor&#x2019;s degree; all participants had completed secondary education. The expert group included participants originally from the United States (n=8), Nigeria (n=1), and Pakistan (n=1), whereas the novice group consisted of participants from the United States (n=4), the United Kingdom (n=2), France (n=1), Brazil (n=1), and Russia (n=1). All participants were residing in Madison, Wisconsin, at the time of data collection. Participants visited a total of 439 patches (patch types: page n=205, SERP n=152, engine n=64, file n=18) with 306 unique patch identifiers. On average, participants in the expert subsample visited 20.4 (SD 11.76) patches per 10-minute search, while novices visited 16.9 (SD 7.01). <xref ref-type="fig" rid="figure2">Figure 2</xref> displays the distribution of patch content types within the 10-minute participant searches.</p><fig position="float" id="figure2"><label>Figure 2.</label><caption><p>Time spent on patch content types per case for both subsamples. SERP: search engine results pages.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="jmir_v27i1e64901_fig02.png"/></fig><p>Experts spent most of their 10-minute search on search engines and results pages, while novices spent most of their time on patches characterized as scicomm (duration in seconds, for experts and novices, respectively: engine/SERP=1566 vs 1054; academia=906 vs 1104; government=1199 vs 1011; news=812 vs 444; scicomm=1326 vs 1385; other=0 vs 398).</p></sec><sec id="s3-2"><title>Qualitative/Unified Exploration of State Transitions</title><p><xref ref-type="fig" rid="figure3">Figure 3</xref> displays the transition probabilities between patch content types (for definitions, see <xref ref-type="table" rid="table2">Table 2</xref>). When a particular patch type and/or transition did not appear in a source (not applicable), they were designated a value zero in the heat map.</p><fig position="float" id="figure3"><label>Figure 3.</label><caption><p>QUEST transition probabilities for patch content types per case for both subsamples. The codebook is provided in Table 3. QUEST: Qualitative/Unified Exploration of State Transitions; SERP: search engine results pages.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="jmir_v27i1e64901_fig03.png"/></fig><p>Of all possible transitions, participants in the expert subsample were most likely to transition from engine/SERP patches to those in the same category (self-loop), while novices were most likely to transition from patches labeled government or other, back to a search engine or results page. Experts usually selected to visit academic or government-curated patches from SERPs, while novices preferred government or scicomm patches. The highest transition probabilities among experts were from government, academic, and news sites back to search engines or results pages. Patches labeled other were only observed in the novice group (eg, Council on Foreign Relations, The New Reddit Journal of Science). Of applicable transitions, the expert subsample was least likely to transition from government patches to academic ones and to exhibit self-loops among news sites. Novices were least likely to transition from government patches to scicomm ones and to select patches labeled other from their search results.</p></sec><sec id="s3-3"><title>SEA Analysis</title><p>To understand how encountered content and its appraisal manifested regarding credibility cues, we used SEA analysis to compute alignment between content and its perceived credibility. <xref ref-type="fig" rid="figure4">Figure 4</xref> shows the alignment of stimulus and participant engagement codes (labels are capitalized) in our 3 data streams per patch, aggregated over the 10-minute search participants performed. In both subsamples, the strongest alignment occurred for external references present (experts n=6, &#x2211;=2.58; novices n=9, &#x2211;=2.81) and date present (experts n=5, &#x2211;=1.40; novices n=7, &#x2211;=1.56). The absence of credibility cues was, in general, disregarded by participants, with the exception of the absence of advertisements; one participant (expert 7) noted the absence of date multiple times as an indicator of questionable trustworthiness.</p><fig position="float" id="figure4"><label>Figure 4.</label><caption><p>SEA results showing the alignment of 16 codes (presence and absence of credibility cues) in the stimulus data stream with those in either human-computer interactions or think-aloud data for experts (1-10) and novices (11-20). The codebook is provided in Table 3. SEA: stimulus-engagement alignment.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="jmir_v27i1e64901_fig04.png"/></fig><p>Although the 2 subsamples averaged similar SEA scores for all codes (0.05), experts engaged with a total of 35 credibility cues, while novices engaged with 42. Experts tended to engage with fewer cues but with higher frequency, which may suggest that expertise in information retrieval and appraisal limits the focus of attention to certain cues (eg, external references, date, and trusted URL) at the expense of others.</p></sec><sec id="s3-4"><title>Comprehensive Comparison</title><p>In this section, we use QUEST (transition probabilities for patch content types), SEA (designated code co-occurrences between encountered credibility cues and engagement with them), code frequency histograms (from coded screen content data), and qualitative insight (via hermeneutics) to compare 4 cases. Cases 1 (female) and 10 (male) were in the expert subsample, while cases 13 (female) and 14 (male) were in the novice subsample. <xref ref-type="fig" rid="figure5">Figures 5</xref> and <xref ref-type="fig" rid="figure6">6</xref> show the QUEST models, SEA scores, and screen content code frequency histograms for 2 experts and 2 novices, respectively.</p><fig position="float" id="figure5"><label>Figure 5.</label><caption><p>Comparison of 2 experts based on QUEST (top left), code frequency histograms (bottom left), and SEA scores (right). The codebook is provided in Table 3. QUEST: Qualitative/Unified Exploration of State Transitions; SEA: stimulus-engagement alignment.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="jmir_v27i1e64901_fig05.png"/></fig><fig position="float" id="figure6"><label>Figure 6.</label><caption><p>Comparison of 2 novices based on QUEST (top left), code frequency histograms (bottom left), and SEA scores (right). The codebook is provided in Table 3. QUEST: Qualitative/Unified Exploration of State Transitions; SEA: stimulus-engagement alignment.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="jmir_v27i1e64901_fig06.png"/></fig><p>Expert 1 conducted a well-defined search with logical operators (eg, &#x201C;(&#x2018;COVID-19&#x2019; OR &#x2018;novel coronavirus&#x2019; AND &#x2018;Wuhan&#x2019; OR &#x2018;Origin&#x2019; AND (&#x2018;bat&#x2019;))&#x201D;) and based retrieval on scientometric considerations, such as: &#x201C;I would go to SJR to check the journal ranking [...] this journal has a long history of being in Q1.&#x201D; Congruent with their focus in retrieval, the participant appraised the trustworthiness of patches primarily based on scientometrics, while displaying limited engagement with other cues (eg, external references, author affiliation, date). The most common credibility cue present in the encountered stimuli was trusted URL, followed by scientometrics and the absence of advertisements. Expert 1 spent most of her time on academic patches and exhibited 20 state transitions among 14 unique web pages; she solely visited academic patches following searches (ie, engines were used to retrieve academic patches only) and was most likely to transition from patches with academic content back to that same patch content type. She was just as likely to use a search engine to find academic patches as to be prompted to search for information that she encountered on patches labeled academia. Thus, expert 1&#x2019;s retrieval and appraisal were all centered around scientometric indicators, and she exhibited an equal likelihood in transitioning from and to search engines and results pages.</p><p>In contrast, expert 10 did not visit any academic patches and was most likely to select scicomm patches from the search results page, which is the patch type he spent most of his time on. Expert 10 exhibited higher SEA scores for more credibility cues (n=6) compared to expert 1 (n=3), of which external references retained the highest frequency. The participant emphasized the primacy of references or citations in the text body during information appraisal: &#x201C;At this point, I was looking at the quotes that they were citing [...] So again, with Wikipedia, trusting the editorial process there, and also the multitude of hyperlinks to other articles that have also gone through that editorial process.&#x201D; Congruently, expert 10 visited a high number of patches exhibiting external references, but cues such as author, author affiliation, and date were more often absent than present in the stimuli. He exhibited 14 transitions among 13 unique web pages, with a strong self-loop in scicomm; he was likely to transition from this category to patches labeled government. From government-curated information, he was likely to transition to news sites and back to those labeled scicomm. Expert 10&#x2019;s QUEST diagram indicates that no searches were performed for new information on encountered patches. In general, expert 10 engaged with the presence of several credibility cues, but frequently encountered patches lacking in these cues (eg, DOI, author, author affiliation) and did not engage with those absences.</p><p>Novice 13 spent most of her search on government patches and engaged with 5 credibility cues during her 10-minute search, such as author affiliation and trusted URL. Nevertheless, she also expressed uncertainty in how these could apply to trustworthiness: &#x201C;Um, honestly, there&#x2019;s no real, real good way to know whether this could or couldn&#x2019;t be trusted.&#x201D; Credibility cues such as author, author affiliation, and date were often absent in the stimuli, but the participant did not engage with these absences (which would have resulted in higher SEA scores). Of their 20 transitions total (12 unique web pages), novice 13 was most likely to transition from academic patches to government ones. After performing a search, she was most likely to click on a patch labeled government and those visits often ended with the participant searching for information she had found on the government site; this was true for patches labeled other as well.</p><p>Novice 14 spent most of his search on scicomm patches. He did not engage with any credibility cues apart from date, which meant disengaging from other cues, such as external references and trusted URL, even though most visited patches exhibited these. Many visited patches lacked DOI, author, and author affiliation, but the participant did not engage with these absences. Novice 14 exhibited 6 transitions, all unique web pages, and was most likely to visit government and scicomm patches following a search. Government self-loops exhibited a high probability, which could indicate a trust in government-generated information, but the participant&#x2019;s think-aloud data demonstrates otherwise: &#x201C;It&#x2019;s basically lying with statistics, you can always find somebody that will make the statistics say what you want. [...] You have to look at reality, reality itself, like, people used to think leaded gasoline was fine. And that&#x2019;s what the majority of scientists said. Scientists and doctors used to say that smoking was healthy for your child while you were pregnant.&#x201D; Thus, novice 14 visited relatively few patches exhibiting a fair number of credibility cues, but did not engage with those cues nor the absence of other cues. In addition, despite viewing mainly government-curated information, the participant expressed distrust concerning that information.</p></sec><sec id="s3-5"><title>Notes on Feasibility</title><p>We explored the possibility of granting even consideration to stimulus and user in modeling information consumption. The co-occurrence of designated codes in the stimulus versus other 2 data streams represented a meaningful comparison of what the participant encountered and what they engaged with. Operationalizing codes as either &#x201C;present&#x201D; or &#x201C;absent&#x201D; in a particular data stream allowed us to inspect not only which credibility cues participants engaged with the most frequency, but also whether participants noticed the absence of cues. This, in turn, served to differentiate stimuli exhibiting a high or low number of cues (measured via their presence and absence), reflective of the trustworthiness of their content. Code co-occurrence frequencies could thus indicate tailored information scent, which also takes into account the quality of information encountered. Retaining individual SEA scores (see <xref ref-type="fig" rid="figure4">Figure 4</xref>) allowed us to pinpoint participant idiosyncrasies concerning which credibility cues participants most frequently engaged with and whether the absence of those cues was cognized. Refraining from aggregation also enabled a more complex within-case exploration using different analyses to show the following:</p><list list-type="order"><list-item><p>The alignment between that which was encountered and engaged with (SEA analysis).</p></list-item><list-item><p>The quality of encountered stimuli (code frequencies).</p></list-item><list-item><p>The probability of transitioning between certain types of content (QUEST).</p></list-item><list-item><p>Qualitative insights into behaviors and mental models (hermeneutics).</p></list-item></list><p>SEA scores were the product of several methodological decisions, most notably code development (eg, from which data stream or streams codes were generated and in what way were their definitions coordinated across streams), code application (eg, choices as to whether instances of codes apply in the case of particular stimuli or acts of engagement), segmentation and alignment of data (eg, operationalizing a meaningful segment of data applicable across data streams), co-occurrence designation (ie, constructing a scent database from meaningful code co-occurrences), and co-occurrence computation (eg, mode of accumulation, weighting).</p></sec></sec><sec id="s4" sec-type="discussion"><title>Discussion</title><sec id="s4-1"><title>Principal Findings</title><p>We tested a novel approach to studying information scent based on synchronized data from 3 qualitative data streams, comparing stimuli and user engagement via the novel SEA analysis. Using SEA allowed us to retain epistemic access to idiosyncratic manifestations of both stimuli and engagement and, through using the same coding scheme and scent database across participants, also allowed us to pinpoint trends within our sample and subsamples. Our comprehensive analysis involving SEA scores, code frequencies, QUEST, and hermeneutics afforded insights into how well credibility cues and their absences in stimuli aligned with participant engagement, what quality and types of content participants encountered, as well as their mental model of the stimuli and their own behavior. This exploration went further than merely indexing what web pages users encounter, or what their subjective insights on content trustworthiness may be, and offered a valid alternative to information scent that is case-, time-, and context-sensitive. We will now proceed to discuss implications intersecting this new method and IF theory.</p></sec><sec id="s4-2"><title>Bridging Stimulus and User</title><p>As a foundational premise of information theory, information is contingent on a sender and a receiver [<xref ref-type="bibr" rid="ref36">36</xref>], which we presently translated to stimulus (features of online content) and user (mental model estimated via engagement manifesting in narratives and HCI). Information scent has been defined according to the &#x201C;maximum expected utility&#x201D; of a patch in light of an information goal, referring &#x201C;to the local cues that users process in making such judgments&#x201D; [<xref ref-type="bibr" rid="ref26">26</xref>]. Although the information goal has a conceptual and operational role in IF-based models, such as SNIF-ACT (Scent-based Navigation and Information Foraging in the ACT architecture), the ability of a user to detect cues as well as their mental models of both the information goal and (features of the) patch are generally de-emphasized or lacking. To account for both sender and receiver, we proposed modifying the concept of information scent to include both stimulus and user, which allowed us to examine not only the presence or absence of credibility cues in visited content, but also whether or not the participant engaged with those. This innovation offers a feasible way to integrate user perception into IF models by creating a conceptual and computational role for both patch features and user mental models; more generally, it offers studies on DHL a way to compare the quality of online content with the user&#x2019;s ability to recognize credibility cues within that content.</p></sec><sec id="s4-3"><title>Curating the Scent Database</title><p>Traditionally, a scent database is constructed from semantic similarity within a large textual corpus, constraining researchers to token-based analyses (eg, [<xref ref-type="bibr" rid="ref37">37</xref>]). Using a corpus that is not tailored to the subject under scrutiny may entail relying on co-occurrences that are not meaningful (eg, merely lend information about the distribution of word forms) or are misleading in certain analyses (eg, represent and reproduce biases resulting from data, algorithms, and content presentation) [<xref ref-type="bibr" rid="ref38">38</xref>]. Depending on analytical objectives, codes that represent higher-level constructs (eg, concepts and symbols that require longer sequences to represent) may be more adequate in capturing both textual and nontextual properties of the data [<xref ref-type="bibr" rid="ref32">32</xref>]. We demonstrated how a scent database can be constituted by designated co-occurrences of codes in the data (stimuli and user engagement) that represent constructs of interest (credibility cues). These codes can be created inductively from the collected data, adopted from an existing coding scheme, or a combination of both. In our case, the inductively created codes showed almost complete overlap with those found in other studies [<xref ref-type="bibr" rid="ref8">8</xref>], except for our code DOI. Designated code co-occurrences may provide a valid means of computing information scent, as they can denote a more meaningful association than the co-occurrence of words.</p></sec><sec id="s4-4"><title>The Primacy of the Information Goal</title><p>Most IF-based studies that define information scent as a property of the stimulus focus on the user&#x2019;s ability to seek out content with high information scent (as per that definition) [<xref ref-type="bibr" rid="ref25">25</xref>], as information goals typically include locating specific pieces of information with high utility in confined search spaces (cf [<xref ref-type="bibr" rid="ref23">23</xref>]). Problem-solving literature often distinguishes between well- and ill-structured problems; while the former entails working with a finite number of concepts and rules in a constrained space with a well-defined initial and goal state, the latter connotes the integration of several content domains to reach nonpredictable or convergent solutions [<xref ref-type="bibr" rid="ref19">19</xref>]. An example of ill-structured problem-solving is sensemaking, which is the act of seeking, integrating, and interpreting information to support decision-making [<xref ref-type="bibr" rid="ref39">39</xref>]. When researching health-related information, sensemaking is often used to perform an organic search and process a variety of information, including untrusted or poor quality content. In these cases, finding the shortest path to good quality information is not as crucial as being able to correctly appraise information quality and integrate encountered content. For this reason, the stimulus-centric definition of information scent cannot be applied to models of sensemaking in a straightforward way, while SEA can potentially accommodate ill-structured problem-solving in nonrestricted search activities.</p></sec><sec id="s4-5"><title>Considering Discernment</title><p>With all primates, including humans, foraging for food is considered learned behavior [<xref ref-type="bibr" rid="ref40">40</xref>]. Analogous to food foraging decisions, such as discerning edible from nonedible or estimating nutritional value versus nutritional requirements, we assumed that the recognition of both the absence and presence of credibility cues together constitute appropriate appraisal of information trustworthiness. In this conceptualization, information scent can still be high, despite the participant visiting an untrusted patch; similarly, the same patch can retain different information scent values for various participants or even differ for the same user over time. We believe this definition of information scent accounts for and more accurately represents the diversity of information environments and user engagement. More specifically, during ill-structured problem-solving, participants may visit patches with varying numbers of credibility cues to triangulate previously encountered information. A web page with few credibility cues may indicate less trustworthy information, but if the user is aware of this caveat, information derived from these patches may still be valuable in sensemaking processes. SEA analysis, used together with qualitative insight, can provide a more accurate interpretation of why a user visited certain types of (trusted or untrusted) content and how they integrated information into their mental model. Using mixed methods to create a rich dataset containing both user actions and perceptions [<xref ref-type="bibr" rid="ref41">41</xref>] also sheds more light on the DHL dimensions of appraisal and understanding via being privy to participant idiosyncrasies in reasons for and implications of visiting patches with varying numbers of credibility cues. As our comprehensive analysis (QUEST, SEA, code frequencies, hermeneutics) showed, this allowed for complex within-case exploration.</p></sec><sec id="s4-6"><title>Alternatives in SEA Computation and Aggregation</title><p>Accounting for both stimulus and user, curating a scent database from meaningful co-occurrences, and aligning the concept of information scent to the (type of) information goal still leaves much methodological leeway in information scent computation. Although we &#x201C;dummy coded&#x201D; credibility cues in stimulus and engagement data, which served as the basis for SEA scores, weighted values could allow for a more refined representation of the presence and absence of credibility cues in stimuli and also for distinguishing between more or less vital cues in user engagement (eg, assigning more weight to author affiliation being present versus a DOI). Naturally, this would also entail justifying the rationale behind weighting choices. These methodological decisions depend crucially on study objectives and underlying epistemic assumptions, as does the aggregation of data across participants. In this study, SEA scores were summed and averaged per subsample to provide descriptive information and pinpoint general tendencies within subsamples.</p></sec><sec id="s4-7"><title>Limitations</title><p>Due to the Markovian model that was used, QUEST diagrams were only able to visualize pairwise transitions between patch content types, which implies that the full sequence of such patch types throughout the 10-minute search was not represented in the model. This connotes an understanding limited to the sequence and interaction of 2 patch types, rather than a complete string, which would have yielded unwieldy sequences hindering the identification of patterns within subsamples. The coding of the stimulus data stream, signifying a &#x201C;gold standard&#x201D; against which user engagement was contrasted, constituted a challenge, as not all codes were applicable to every patch. For example, while the date of publication can be expected to be present on all web pages with substantive content, a DOI may not be applicable to news reports or interviews. Further complicating this issue, some news sites provide a DOI to all published pieces, hence a DOI could be expected for those patches, but not necessarily for other news websites. These judgments of code applicability were made on a patch-by-patch basis by the coder responsible for coding the stimulus data stream; without a more systematic approach to determining code applicability, this connotes a threat to reliability. Furthermore, as with all such methodological decisions, the operationalization of patch and code co-occurrences constrained our analysis. Patches were considered unique across but not within search tasks, therefore it was not possible to (1) identify which content was accessed by more than one participant and (2) how information scent changed over the duration of a search. This information superseded the goals of this paper but could be obtained from the same data if coded differently. Additionally, although think-aloud and HCI data served as a valuable indicator of participants&#x2019; mental models of the information goal and content appraisal, engagement could also have been measured with other modalities, such as eye-tracking, which could provide further insight into user mental models.</p><p>Analytical gaze can and should be directed toward varying participant interpretations of the same cues. For example, even though 2 cases engage with scientometric indicators on a patch, the presence or absence of scientometrics, and the notion itself, may carry markedly different meaning for the 2 users. Additionally, as with any analysis based on qualitative coding, SEA scores are determined by coding and segmentation choices. These choices and their methodological implications are listed in the section &#x201C;Notes on Feasibility&#x201D; and have a profound implication for the interpretation of SEA results. Furthermore, as flattened data, SEA scores in themselves may be misleading (as mentioned in the &#x201C;Concerning Discernment&#x201D; section) and require qualitative insight to formulate, validate, influence, or question their meaning. Finally, we would like to reiterate that this was a feasibility study; hence, it is likely that our substantive results reflect sampling or other error sources rather than regularities in how people process health information. As such, although our conclusions concerning feasibility are solid, these substantive findings should be considered tenuous.</p></sec><sec id="s4-8"><title>Conclusions</title><p>Developing and deploying SEA as a means for information scent computation enabled us to compare patch features with user engagement and to analyze user-specific information scent values. Aside from aggregating data across cases to detect subsample-specific tendencies, via using the combination of anchor-based stream synchronization, SEA, QUEST, code frequencies, and qualitative insight, we were able to obtain a refined within-case perspective as well. We believe these tools lend a powerful analysis encompassing the breadth and depth of data, both on par with each other in the feat of understanding web-based search behavior. Future studies could benefit from exploring weighting in SEA variables, enabling a refined ranking of credibility cues (reflecting real-world phenomena under scrutiny) and designated co-occurrences of more than 2 codes (enabling more complex associations among variables). SEA analyses can be extended beyond credibility cues to any construct of interest (eg, query specifications or the comparison of understanding and appraisal versus implementation), supporting the exploration of other DHL dimensions.</p></sec></sec></body><back><ack><p>This project received funding from the European Union&#x2019;s Horizon 2020 research and innovation program under the Marie Sklodowska-Curie grant agreement number 101028644, as well as from University Fund Limburg/SWOL. The opinions, findings, and conclusions do not reflect the views of the funding agency, cooperating institutions, or other individuals.</p></ack><notes><sec><title>Data Availability</title><p>All data for this feasibility study can be accessed via our repository at [<xref ref-type="bibr" rid="ref27">27</xref>].</p></sec></notes><fn-group><fn fn-type="con"><p>Conceptualization: SZ, RC</p><p>Data curation: SZ</p><p>Formal analysis: SZ, AJ, SDK</p><p>Investigation: SZ</p><p>Methodology: SZ, GJP, RC</p><p>Project administration: SZ, RC</p><p>Software: GJP</p><p>Supervision: RC</p><p>Visualization: GJP, RC</p><p>Writing &#x2013; original draft: SZ, AJ, SDK</p><p>Writing &#x2013; review &#x0026; editing: SZ, GJP, AJ, SDK, RC</p></fn><fn fn-type="conflict"><p>None declared.</p></fn></fn-group><glossary><title>Abbreviations</title><def-list><def-item><term id="abb1">DHL</term><def><p>digital health literacy</p></def></def-item><def-item><term id="abb2">HCI</term><def><p>human-computer interactions</p></def></def-item><def-item><term id="abb3">IF</term><def><p>information foraging</p></def></def-item><def-item><term id="abb4">iROCK</term><def><p>Interface for the Reproducible Open Coding Kit</p></def></def-item><def-item><term id="abb5">pids</term><def><p> patch identifiers</p></def></def-item><def-item><term id="abb6">QUEST</term><def><p> Qualitative/Unified Exploration of State Transitions</p></def></def-item><def-item><term id="abb7">SEA</term><def><p>stimulus-engagement alignment</p></def></def-item><def-item><term id="abb8">SERP</term><def><p>search engine results pages</p></def></def-item><def-item><term id="abb9">SNIF-ACT</term><def><p>Scent-based Navigation and Information Foraging in the ACT architecture</p></def></def-item><def-item><term id="abb10">TA</term><def><p>think-aloud</p></def></def-item><def-item><term id="abb11">TA-HCI</term><def><p>think-aloud human-computer interactions</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="web"><article-title>Individuals using the internet for seeking health-related information</article-title><source>Eurostat</source><year>2023</year><access-date>2025-06-23</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://ec.europa.eu/eurostat/databrowser/view/tin00101/default/table?lang=en">https://ec.europa.eu/eurostat/databrowser/view/tin00101/default/table?lang=en</ext-link></comment></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kubb</surname><given-names>C</given-names> </name><name name-style="western"><surname>Foran</surname><given-names>HM</given-names> </name></person-group><article-title>Online health information seeking for self and child: an experimental study of parental symptom search</article-title><source>JMIR Pediatr Parent</source><year>2022</year><month>05</month><day>9</day><volume>5</volume><issue>2</issue><fpage>e29618</fpage><pub-id pub-id-type="doi">10.2196/29618</pub-id><pub-id pub-id-type="medline">35532970</pub-id></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>van Kessel</surname><given-names>R</given-names> </name><name name-style="western"><surname>Wong</surname><given-names>BLH</given-names> </name><name name-style="western"><surname>Clemens</surname><given-names>T</given-names> </name><name name-style="western"><surname>Brand</surname><given-names>H</given-names> </name></person-group><article-title>Digital health literacy as a super determinant of health: more than simply the sum of its parts</article-title><source>Internet Interv</source><year>2022</year><month>03</month><volume>27</volume><fpage>100500</fpage><pub-id pub-id-type="doi">10.1016/j.invent.2022.100500</pub-id><pub-id pub-id-type="medline">35242586</pub-id></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Diviani</surname><given-names>N</given-names> </name><name name-style="western"><surname>van den Putte</surname><given-names>B</given-names> </name><name name-style="western"><surname>Giani</surname><given-names>S</given-names> </name><name name-style="western"><surname>van Weert</surname><given-names>JC</given-names> </name></person-group><article-title>Low health literacy and evaluation of online health information: a systematic review of the literature</article-title><source>J Med Internet Res</source><year>2015</year><month>05</month><day>7</day><volume>17</volume><issue>5</issue><fpage>e112</fpage><pub-id pub-id-type="doi">10.2196/jmir.4018</pub-id><pub-id pub-id-type="medline">25953147</pub-id></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Song</surname><given-names>S</given-names> </name><name name-style="western"><surname>Zhang</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Yu</surname><given-names>B</given-names> </name></person-group><article-title>Interventions to support consumer evaluation of online health information credibility: a scoping review</article-title><source>Int J Med Inform</source><year>2021</year><month>01</month><volume>145</volume><fpage>104321</fpage><pub-id pub-id-type="doi">10.1016/j.ijmedinf.2020.104321</pub-id><pub-id pub-id-type="medline">33202372</pub-id></nlm-citation></ref><ref id="ref6"><label>6</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>McGrew</surname><given-names>S</given-names> </name><name name-style="western"><surname>Breakstone</surname><given-names>J</given-names> </name><name name-style="western"><surname>Ortega</surname><given-names>T</given-names> </name><name name-style="western"><surname>Smith</surname><given-names>M</given-names> </name><name name-style="western"><surname>Wineburg</surname><given-names>S</given-names> </name></person-group><article-title>Can students evaluate online sources? Learning from assessments of civic online reasoning</article-title><source>Theory &#x0026; Research in Social Education</source><year>2018</year><month>04</month><day>3</day><volume>46</volume><issue>2</issue><fpage>165</fpage><lpage>193</lpage><pub-id pub-id-type="doi">10.1080/00933104.2017.1416320</pub-id></nlm-citation></ref><ref id="ref7"><label>7</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Feufel</surname><given-names>MA</given-names> </name><name name-style="western"><surname>Stahl</surname><given-names>SF</given-names> </name></person-group><article-title>What do web-use skill differences imply for online health information searches?</article-title><source>J Med Internet Res</source><year>2012</year><month>06</month><day>13</day><volume>14</volume><issue>3</issue><fpage>e87</fpage><pub-id pub-id-type="doi">10.2196/jmir.2051</pub-id><pub-id pub-id-type="medline">22695686</pub-id></nlm-citation></ref><ref id="ref8"><label>8</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sun</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Zhang</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Gwizdka</surname><given-names>J</given-names> </name><name name-style="western"><surname>Trace</surname><given-names>CB</given-names> </name></person-group><article-title>Consumer evaluation of the quality of online health information: systematic literature review of relevant criteria and indicators</article-title><source>J Med Internet Res</source><year>2019</year><month>05</month><day>2</day><volume>21</volume><issue>5</issue><fpage>e12522</fpage><pub-id pub-id-type="doi">10.2196/12522</pub-id><pub-id pub-id-type="medline">31045507</pub-id></nlm-citation></ref><ref id="ref9"><label>9</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Dadaczynski</surname><given-names>K</given-names> </name><name name-style="western"><surname>Okan</surname><given-names>O</given-names> </name><name name-style="western"><surname>Messer</surname><given-names>M</given-names> </name><etal/></person-group><article-title>Digital health literacy and web-based information-seeking behaviors of university students in Germany during the COVID-19 pandemic: cross-sectional survey study</article-title><source>J Med Internet Res</source><year>2021</year><month>01</month><day>15</day><volume>23</volume><issue>1</issue><fpage>e24097</fpage><pub-id pub-id-type="doi">10.2196/24097</pub-id><pub-id pub-id-type="medline">33395396</pub-id></nlm-citation></ref><ref id="ref10"><label>10</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>van der Vaart</surname><given-names>R</given-names> </name><name name-style="western"><surname>Drossaert</surname><given-names>C</given-names> </name></person-group><article-title>Development of the Digital Health Literacy Instrument: measuring a broad spectrum of Health 1.0 and Health 2.0 skills</article-title><source>J Med Internet Res</source><year>2017</year><month>01</month><day>24</day><volume>19</volume><issue>1</issue><fpage>e27</fpage><pub-id pub-id-type="doi">10.2196/jmir.6709</pub-id><pub-id pub-id-type="medline">28119275</pub-id></nlm-citation></ref><ref id="ref11"><label>11</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sieck</surname><given-names>CJ</given-names> </name><name name-style="western"><surname>Sheon</surname><given-names>A</given-names> </name><name name-style="western"><surname>Ancker</surname><given-names>JS</given-names> </name><name name-style="western"><surname>Castek</surname><given-names>J</given-names> </name><name name-style="western"><surname>Callahan</surname><given-names>B</given-names> </name><name name-style="western"><surname>Siefer</surname><given-names>A</given-names> </name></person-group><article-title>Digital inclusion as a social determinant of health</article-title><source>NPJ Digit Med</source><year>2021</year><month>03</month><day>17</day><volume>4</volume><issue>1</issue><fpage>52</fpage><pub-id pub-id-type="doi">10.1038/s41746-021-00413-8</pub-id><pub-id pub-id-type="medline">33731887</pub-id></nlm-citation></ref><ref id="ref12"><label>12</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Rodriguez</surname><given-names>JA</given-names> </name><name name-style="western"><surname>Clark</surname><given-names>CR</given-names> </name><name name-style="western"><surname>Bates</surname><given-names>DW</given-names> </name></person-group><article-title>Digital health equity as a necessity in the 21st Century Cures Act era</article-title><source>JAMA</source><year>2020</year><month>06</month><day>16</day><volume>323</volume><issue>23</issue><fpage>2381</fpage><lpage>2382</lpage><pub-id pub-id-type="doi">10.1001/jama.2020.7858</pub-id><pub-id pub-id-type="medline">32463421</pub-id></nlm-citation></ref><ref id="ref13"><label>13</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ward</surname><given-names>M</given-names> </name><name name-style="western"><surname>Gruppen</surname><given-names>L</given-names> </name><name name-style="western"><surname>Regehr</surname><given-names>G</given-names> </name></person-group><article-title>Measuring self-assessment: current state of the art</article-title><source>Adv Health Sci Educ Theory Pract</source><year>2002</year><volume>7</volume><issue>1</issue><fpage>63</fpage><lpage>80</lpage><pub-id pub-id-type="doi">10.1023/a:1014585522084</pub-id><pub-id pub-id-type="medline">11912336</pub-id></nlm-citation></ref><ref id="ref14"><label>14</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Flanagin</surname><given-names>AJ</given-names> </name><name name-style="western"><surname>Metzger</surname><given-names>MJ</given-names> </name></person-group><article-title>The role of site features, user attributes, and information verification behaviors on the perceived credibility of web-based information</article-title><source>New Media &#x0026; Society</source><year>2007</year><month>04</month><volume>9</volume><issue>2</issue><fpage>319</fpage><lpage>342</lpage><pub-id pub-id-type="doi">10.1177/1461444807075015</pub-id></nlm-citation></ref><ref id="ref15"><label>15</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kienhues</surname><given-names>D</given-names> </name><name name-style="western"><surname>Stadtler</surname><given-names>M</given-names> </name><name name-style="western"><surname>Bromme</surname><given-names>R</given-names> </name></person-group><article-title>Dealing with conflicting or consistent medical information on the web: when expert information breeds laypersons&#x2019; doubts about experts</article-title><source>Learn Instr</source><year>2011</year><month>04</month><volume>21</volume><issue>2</issue><fpage>193</fpage><lpage>204</lpage><pub-id pub-id-type="doi">10.1016/j.learninstruc.2010.02.004</pub-id></nlm-citation></ref><ref id="ref16"><label>16</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Betsch</surname><given-names>C</given-names> </name><name name-style="western"><surname>Renkewitz</surname><given-names>F</given-names> </name><name name-style="western"><surname>Betsch</surname><given-names>T</given-names> </name><name name-style="western"><surname>Ulsh&#x00F6;fer</surname><given-names>C</given-names> </name></person-group><article-title>The influence of vaccine-critical websites on perceiving vaccination risks</article-title><source>J Health Psychol</source><year>2010</year><month>04</month><volume>15</volume><issue>3</issue><fpage>446</fpage><lpage>455</lpage><pub-id pub-id-type="doi">10.1177/1359105309353647</pub-id><pub-id pub-id-type="medline">20348365</pub-id></nlm-citation></ref><ref id="ref17"><label>17</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Pirolli</surname><given-names>P</given-names> </name><name name-style="western"><surname>Fu</surname><given-names>WT</given-names> </name><name name-style="western"><surname>Reeder</surname><given-names>R</given-names> </name><name name-style="western"><surname>Card</surname><given-names>SK</given-names> </name></person-group><article-title>A user-tracing architecture for modeling interaction with the world wide web</article-title><conf-name>AVI&#x2019;02</conf-name><conf-date>May 22-24, 2002</conf-date><conf-loc>Trento, Italy</conf-loc><fpage>75</fpage><lpage>83</lpage><pub-id pub-id-type="doi">10.1145/1556262.1556272</pub-id></nlm-citation></ref><ref id="ref18"><label>18</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Eysenbach</surname><given-names>G</given-names> </name><name name-style="western"><surname>Powell</surname><given-names>J</given-names> </name><name name-style="western"><surname>Kuss</surname><given-names>O</given-names> </name><name name-style="western"><surname>Sa</surname><given-names>ER</given-names> </name></person-group><article-title>Empirical studies assessing the quality of health information for consumers on the world wide web: a systematic review</article-title><source>JAMA</source><year>2002</year><volume>287</volume><issue>20</issue><fpage>2691</fpage><lpage>2700</lpage><pub-id pub-id-type="doi">10.1001/jama.287.20.2691</pub-id><pub-id pub-id-type="medline">12020305</pub-id></nlm-citation></ref><ref id="ref19"><label>19</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Jonassen</surname><given-names>DH</given-names> </name></person-group><article-title>Toward a design theory of problem solving</article-title><source>ETR&#x0026;D</source><year>2000</year><month>12</month><volume>48</volume><issue>4</issue><fpage>63</fpage><lpage>85</lpage><pub-id pub-id-type="doi">10.1007/BF02300500</pub-id></nlm-citation></ref><ref id="ref20"><label>20</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Boothby</surname><given-names>C</given-names> </name><name name-style="western"><surname>Murray</surname><given-names>D</given-names> </name><name name-style="western"><surname>Waggy</surname><given-names>AP</given-names> </name><name name-style="western"><surname>Tsou</surname><given-names>A</given-names> </name><name name-style="western"><surname>Sugimoto</surname><given-names>CR</given-names> </name></person-group><article-title>Credibility of scientific information on social media: variation by platform, genre and presence of formal credibility cues</article-title><source>Quantitative Science Studies</source><year>2021</year><month>11</month><day>5</day><volume>2</volume><issue>3</issue><fpage>845</fpage><lpage>863</lpage><pub-id pub-id-type="doi">10.1162/qss_a_00151</pub-id></nlm-citation></ref><ref id="ref21"><label>21</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Rains</surname><given-names>SA</given-names> </name><name name-style="western"><surname>Karmikel</surname><given-names>CD</given-names> </name></person-group><article-title>Health information-seeking and perceptions of website credibility: examining Web-use orientation, message characteristics, and structural features of websites</article-title><source>Comput Human Behav</source><year>2009</year><month>03</month><volume>25</volume><issue>2</issue><fpage>544</fpage><lpage>553</lpage><pub-id pub-id-type="doi">10.1016/j.chb.2008.11.005</pub-id></nlm-citation></ref><ref id="ref22"><label>22</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ayani</surname><given-names>S</given-names> </name><name name-style="western"><surname>Sadoughi</surname><given-names>F</given-names> </name><name name-style="western"><surname>Jabari</surname><given-names>R</given-names> </name><name name-style="western"><surname>Moulaei</surname><given-names>K</given-names> </name><name name-style="western"><surname>Ashrafi-Rizi</surname><given-names>H</given-names> </name></person-group><article-title>Evaluation criteria for health websites: critical review</article-title><source>Front Health Inform</source><year>2020</year><month>09</month><day>12</day><volume>9</volume><issue>1</issue><fpage>44</fpage><pub-id pub-id-type="doi">10.30699/fhi.v9i1.235</pub-id></nlm-citation></ref><ref id="ref23"><label>23</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Pirolli</surname><given-names>P</given-names> </name></person-group><source>Information Foraging Theory: Adaptive Interaction with Information</source><year>2007</year><publisher-name>Oxford University Press</publisher-name><pub-id pub-id-type="other">9780199893232</pub-id></nlm-citation></ref><ref id="ref24"><label>24</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Pirolli</surname><given-names>P</given-names> </name><name name-style="western"><surname>Card</surname><given-names>SK</given-names> </name></person-group><article-title>Information foraging</article-title><source>Psychol Rev</source><year>1999</year><volume>106</volume><issue>4</issue><fpage>643</fpage><lpage>675</lpage><pub-id pub-id-type="doi">10.1037//0033-295X.106.4.643</pub-id></nlm-citation></ref><ref id="ref25"><label>25</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Nwagwu</surname><given-names>W</given-names> </name></person-group><article-title>Thirty-two years of research on information foraging theory: evolution, key contributions and emerging directions</article-title><source>MJLIS</source><year>2024</year><month>12</month><day>30</day><volume>29</volume><issue>3</issue><fpage>117</fpage><lpage>143</lpage><pub-id pub-id-type="doi">10.22452/mjlis.vol29no3.6</pub-id></nlm-citation></ref><ref id="ref26"><label>26</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Pirolli</surname><given-names>P</given-names> </name><name name-style="western"><surname>Wai-Tat</surname><given-names>F</given-names> </name></person-group><article-title>SNIF-ACT: a model of information foraging on the world wide web</article-title><conf-name>UM&#x2019;03: Proceedings of the 9th International Conference on User Modeling</conf-name><conf-date>Jun 22-26, 2003</conf-date><conf-loc>Berlin, Heidelberg</conf-loc><fpage>45</fpage><lpage>54</lpage><pub-id pub-id-type="doi">10.1007/3-540-44963-9_8</pub-id></nlm-citation></ref><ref id="ref27"><label>27</label><nlm-citation citation-type="web"><person-group person-group-type="author"><name name-style="western"><surname>Z&#x00F6;rg&#x0151;</surname><given-names>S</given-names> </name><name name-style="western"><surname>Jeney</surname><given-names>A</given-names> </name><name name-style="western"><surname>Peters</surname><given-names>GJ</given-names> </name><name name-style="western"><surname>Ruis</surname><given-names>A</given-names> </name><name name-style="western"><surname>Kov&#x00E1;cs</surname><given-names>SD</given-names> </name><name name-style="western"><surname>Crutzen</surname><given-names>R</given-names> </name></person-group><source>Smart Online Searching To Increase Patient Safety (SOS-TIPS)</source><access-date>2025-06-23</access-date><publisher-name>OSF</publisher-name><comment><ext-link ext-link-type="uri" xlink:href="https://osf.io/ynt7a/">https://osf.io/ynt7a/</ext-link></comment></nlm-citation></ref><ref id="ref28"><label>28</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Cooper</surname><given-names>C</given-names> </name><name name-style="western"><surname>Lorenc</surname><given-names>T</given-names> </name><name name-style="western"><surname>Schauberger</surname><given-names>U</given-names> </name></person-group><article-title>What you see depends on where you sit: the effect of geographical location on web-searching for systematic reviews: a case study</article-title><source>Res Synth Methods</source><year>2021</year><month>07</month><volume>12</volume><issue>4</issue><fpage>557</fpage><lpage>570</lpage><pub-id pub-id-type="doi">10.1002/jrsm.1485</pub-id><pub-id pub-id-type="medline">33713573</pub-id></nlm-citation></ref><ref id="ref29"><label>29</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Czaja</surname><given-names>SJ</given-names> </name><name name-style="western"><surname>Charness</surname><given-names>N</given-names> </name><name name-style="western"><surname>Fisk</surname><given-names>AD</given-names> </name><etal/></person-group><article-title>Factors predicting the use of technology: findings from the Center for Research and Education on Aging and Technology Enhancement (CREATE)</article-title><source>Psychol Aging</source><year>2006</year><month>06</month><volume>21</volume><issue>2</issue><fpage>333</fpage><lpage>352</lpage><pub-id pub-id-type="doi">10.1037/0882-7974.21.2.333</pub-id><pub-id pub-id-type="medline">16768579</pub-id></nlm-citation></ref><ref id="ref30"><label>30</label><nlm-citation citation-type="web"><person-group person-group-type="author"><name name-style="western"><surname>Z&#x00F6;rg&#x0151;</surname><given-names>S</given-names> </name><name name-style="western"><surname>Crutzen</surname><given-names>R</given-names> </name><name name-style="western"><surname>Jeney</surname><given-names>A</given-names> </name><name name-style="western"><surname>Peters</surname><given-names>GJ</given-names> </name><name name-style="western"><surname>Ruis</surname><given-names>A</given-names> </name><name name-style="western"><surname>Kov&#x00E1;cs</surname><given-names>SD</given-names> </name></person-group><article-title>Codebook</article-title><source>OSF</source><access-date>2025-06-23</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://osf.io/hu28c">https://osf.io/hu28c</ext-link></comment></nlm-citation></ref><ref id="ref31"><label>31</label><nlm-citation citation-type="web"><person-group person-group-type="author"><name name-style="western"><surname>Peters</surname><given-names>GJ</given-names> </name><name name-style="western"><surname>Z&#x00F6;rg&#x0151;</surname><given-names>S</given-names> </name></person-group><article-title>Rock: Reproducible Open Coding Kit. R package version 0.9.7</article-title><year>2019</year><access-date>2025-06-23</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://rock.opens.science">https://rock.opens.science</ext-link></comment></nlm-citation></ref><ref id="ref32"><label>32</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Shaffer</surname><given-names>D</given-names> </name></person-group><source>Quantitative Ethnography</source><year>2017</year><publisher-name>Cathcart Press</publisher-name><pub-id pub-id-type="other">978-0578191683</pub-id></nlm-citation></ref><ref id="ref33"><label>33</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Z&#x00F6;rg&#x0151;</surname><given-names>S</given-names> </name><name name-style="western"><surname>Peters</surname><given-names>GJ</given-names> </name><name name-style="western"><surname>Jeney</surname><given-names>A</given-names> </name><name name-style="western"><surname>Shaffer</surname><given-names>DW</given-names> </name><name name-style="western"><surname>Ruis</surname><given-names>AR</given-names> </name><name name-style="western"><surname>Crutzen</surname><given-names>R</given-names> </name></person-group><article-title>A feasibility study for a unified, multimodal analysis of online information foraging in health-related topics</article-title><source>Open Res Eur</source><year>2023</year><volume>3</volume><issue>98</issue><fpage>98</fpage><pub-id pub-id-type="doi">10.12688/openreseurope.16119.2</pub-id><pub-id pub-id-type="medline">38655132</pub-id></nlm-citation></ref><ref id="ref34"><label>34</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Burla</surname><given-names>L</given-names> </name><name name-style="western"><surname>Knierim</surname><given-names>B</given-names> </name><name name-style="western"><surname>Barth</surname><given-names>J</given-names> </name><name name-style="western"><surname>Liewald</surname><given-names>K</given-names> </name><name name-style="western"><surname>Duetz</surname><given-names>M</given-names> </name><name name-style="western"><surname>Abel</surname><given-names>T</given-names> </name></person-group><article-title>From text to codings: intercoder reliability assessment in qualitative content analysis</article-title><source>Nurs Res</source><year>2008</year><volume>57</volume><issue>2</issue><fpage>113</fpage><lpage>117</lpage><pub-id pub-id-type="doi">10.1097/01.NNR.0000313482.33917.7d</pub-id><pub-id pub-id-type="medline">18347483</pub-id></nlm-citation></ref><ref id="ref35"><label>35</label><nlm-citation citation-type="web"><article-title>An interface for the ROCK: irock</article-title><source>The ROCK</source><access-date>2025-06-23</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://i.rock.science">https://i.rock.science</ext-link></comment></nlm-citation></ref><ref id="ref36"><label>36</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Logan</surname><given-names>RK</given-names> </name></person-group><article-title>What is information?: Why is it relativistic and what is its relationship to materiality, meaning and organization</article-title><source>Information</source><year>2012</year><month>03</month><volume>3</volume><issue>1</issue><fpage>68</fpage><lpage>91</lpage><pub-id pub-id-type="doi">10.3390/info3010068</pub-id></nlm-citation></ref><ref id="ref37"><label>37</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kedrick</surname><given-names>K</given-names> </name><name name-style="western"><surname>Schrater</surname><given-names>P</given-names> </name><name name-style="western"><surname>Koutstaal</surname><given-names>W</given-names> </name></person-group><article-title>The multifaceted role of self-generated question asking in curiosity-driven learning</article-title><source>Cogn Sci</source><year>2023</year><month>04</month><volume>47</volume><issue>4</issue><fpage>e13253</fpage><pub-id pub-id-type="doi">10.1111/cogs.13253</pub-id><pub-id pub-id-type="medline">37012694</pub-id></nlm-citation></ref><ref id="ref38"><label>38</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Shah</surname><given-names>C</given-names> </name><name name-style="western"><surname>Bender</surname><given-names>EM</given-names> </name></person-group><article-title>Envisioning information access systems: what makes for good tools and a healthy web?</article-title><source>ACM Trans Web</source><year>2024</year><month>08</month><day>31</day><volume>18</volume><issue>3</issue><fpage>1</fpage><lpage>24</lpage><pub-id pub-id-type="doi">10.1145/3649468</pub-id></nlm-citation></ref><ref id="ref39"><label>39</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ntuen</surname><given-names>CA</given-names> </name><name name-style="western"><surname>Park</surname><given-names>EH</given-names> </name><name name-style="western"><surname>Gwang-Myung</surname><given-names>K</given-names> </name></person-group><article-title>Designing an information visualization tool for sensemaking</article-title><source>Int J Hum Comput Interact</source><year>2010</year><month>03</month><day>10</day><volume>26</volume><issue>2-3</issue><fpage>189</fpage><lpage>205</lpage><pub-id pub-id-type="doi">10.1080/10447310903498825</pub-id></nlm-citation></ref><ref id="ref40"><label>40</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Rapaport</surname><given-names>LG</given-names> </name><name name-style="western"><surname>Brown</surname><given-names>GR</given-names> </name></person-group><article-title>Social influences on foraging behavior in young nonhuman primates: learning what, where, and how to eat</article-title><source>Evol Anthropol</source><year>2008</year><month>07</month><volume>17</volume><issue>4</issue><fpage>189</fpage><lpage>201</lpage><pub-id pub-id-type="doi">10.1002/evan.20180</pub-id></nlm-citation></ref><ref id="ref41"><label>41</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Hargittai</surname><given-names>E</given-names> </name></person-group><article-title>Beyond logs and surveys: in&#x2010;depth measures of people&#x2019;s web use skills</article-title><source>J Am Soc Inf Sci</source><year>2002</year><month>12</month><volume>53</volume><issue>14</issue><fpage>1239</fpage><lpage>1244</lpage><pub-id pub-id-type="doi">10.1002/asi.10166</pub-id></nlm-citation></ref></ref-list></back></article>