<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="research-article"><front><journal-meta><journal-id journal-id-type="nlm-ta">J Med Internet Res</journal-id><journal-id journal-id-type="publisher-id">jmir</journal-id><journal-id journal-id-type="index">1</journal-id><journal-title>Journal of Medical Internet Research</journal-title><abbrev-journal-title>J Med Internet Res</abbrev-journal-title><issn pub-type="epub">1438-8871</issn><publisher><publisher-name>JMIR Publications</publisher-name><publisher-loc>Toronto, Canada</publisher-loc></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">v28i1e86249</article-id><article-id pub-id-type="doi">10.2196/86249</article-id><article-categories><subj-group subj-group-type="heading"><subject>Original Paper</subject></subj-group></article-categories><title-group><article-title>The Open Syndrome Definition as a Machine-Readable Standard for Public Health: Design and Implementation Study</article-title></title-group><contrib-group><contrib contrib-type="author"><name name-style="western"><surname>Ferreira</surname><given-names>Ana Paula Gomes</given-names></name><degrees>MSc</degrees><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>An&#x017E;el</surname><given-names>Aleksandar</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Marcilio</surname><given-names>Izabel</given-names></name><degrees>Dr med</degrees><xref ref-type="aff" rid="aff3">3</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Hughes</surname><given-names>Helen</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff4">4</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Elliot</surname><given-names>Alex J</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff4">4</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Kong</surname><given-names>Jude Dzevela</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff5">5</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Schranz</surname><given-names>Madlen</given-names></name><degrees>MSc</degrees><xref ref-type="aff" rid="aff6">6</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Ullrich</surname><given-names>Alexander</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff6">6</xref></contrib><contrib contrib-type="author" corresp="yes"><name name-style="western"><surname>Hattab</surname><given-names>Georges</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="aff" rid="aff2">2</xref></contrib></contrib-group><aff id="aff1"><institution>Center for Artificial Intelligence in Public Health Research, Robert Koch Institute</institution><addr-line>Nordufer 20</addr-line><addr-line>Berlin</addr-line><country>Germany</country></aff><aff id="aff2"><institution>Department of Mathematics and Computer Science, Freie Universit&#x00E4;t Berlin</institution><addr-line>Berlin</addr-line><country>Germany</country></aff><aff id="aff3"><institution>Center for Data and Knowledge Integration for Health, Funda&#x00E7;&#x00E3;o Oswaldo Cruz</institution><addr-line>Salvador</addr-line><country>Brazil</country></aff><aff id="aff4"><institution>Real-time Syndromic Surveillance Team, UK Health Security Agency</institution><addr-line>Birmingham</addr-line><country>United Kingdom</country></aff><aff id="aff5"><institution>Department of Mathematics and Statistics, Africa-Canada Artificial Intelligence and Data Innovation Consortium, York University</institution><addr-line>Toronto</addr-line><addr-line>ON</addr-line><country>Canada</country></aff><aff id="aff6"><institution>Infectious Disease Epidemiology Department, Robert Koch Institute</institution><addr-line>Berlin</addr-line><country>Germany</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Stone</surname><given-names>Alicia</given-names></name></contrib></contrib-group><contrib-group><contrib contrib-type="reviewer"><name name-style="western"><surname>Emonet</surname><given-names>Vincent</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Tomasovic</surname><given-names>Zeljka</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to Georges Hattab, PhD, Center for Artificial Intelligence in Public Health Research, Robert Koch Institute, Nordufer 20, Berlin, D-13353, Germany, +49 30 18754 0, +49 30 18754 2328; <email>HattabG@rki.de</email></corresp></author-notes><pub-date pub-type="collection"><year>2026</year></pub-date><pub-date pub-type="epub"><day>18</day><month>6</month><year>2026</year></pub-date><volume>28</volume><elocation-id>e86249</elocation-id><history><date date-type="received"><day>21</day><month>10</month><year>2025</year></date><date date-type="rev-recd"><day>29</day><month>04</month><year>2026</year></date><date date-type="accepted"><day>29</day><month>04</month><year>2026</year></date></history><copyright-statement>&#x00A9; Ana Paula Gomes Ferreira, Aleksandar An&#x017E;el, Izabel Marcilio, Helen Hughes, Alex J Elliot, Jude Dzevela Kong, Madlen Schranz, Alexander Ullrich, Georges Hattab. Originally published in the Journal of Medical Internet Research (<ext-link ext-link-type="uri" xlink:href="https://www.jmir.org">https://www.jmir.org</ext-link>), 18.6.2026. </copyright-statement><copyright-year>2026</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in the Journal of Medical Internet Research (ISSN 1438-8871), is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="https://www.jmir.org/">https://www.jmir.org/</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://www.jmir.org/2026/1/e86249"/><abstract><sec><title>Background</title><p>Case definitions are essential for effectively communicating public health threats. However, the absence of a standardized, machine-readable format poses significant challenges to interoperability, epidemiological research, data sharing, and the application of computational methods, including artificial intelligence. These barriers complicate collaboration across regions and organizations and hinder technological progress in public health.</p></sec><sec><title>Objective</title><p>This study aims to propose and release the first open, machine-readable format for representing case and syndrome definitions, together with tools and resources that enable their standardized and scalable use.</p></sec><sec sec-type="methods"><title>Methods</title><p>We developed the Open Syndrome Definition, a structured, machine-readable schema for representing case and syndrome definitions. We compiled official public health case definitions from multiple institutions and converted them into standardized, machine-readable representations using open-source tools. These tools, available through GitHub under the Massachusetts Institute of Technology license, automate the translation of narrative definitions into structured data. We also created a platform for browsing, analyzing, and contributing new definitions on our initiative website.</p></sec><sec sec-type="results"><title>Results</title><p>The Open Syndrome Definition format enabled consistent, automated representation of case definitions across different diseases and jurisdictions. The conversion tools achieved high semantic fidelity, as assessed by qualitative expert review, between narrative and structured representations, supporting human verification and automated analysis. The dataset and accompanying tools demonstrated structural and semantic interoperability by standardizing definitions from various health systems into a unified format and integrating existing medical ontologies through JSON for Linked Data. To further illustrate practical applicability and downstream usage, we introduced a data filtering prototype that allows users to upload their own datasets and verify the results against the standardized definitions.</p></sec><sec sec-type="conclusions"><title>Conclusions</title><p>The Open Syndrome Definition establishes a foundation for consistent and machine-readable public health definitions, facilitating reproducible research and interoperability at scale. By enabling systematic data exchange and artificial intelligence&#x2013;driven analysis, it strengthens public health preparedness and supports more rapid, coordinated responses to emerging health threats.</p></sec></abstract><kwd-group><kwd>public health surveillance</kwd><kwd>epidemiological monitoring</kwd><kwd>health information exchange</kwd><kwd>public health</kwd><kwd>AI</kwd><kwd>data standardization</kwd><kwd>artificial intelligence</kwd></kwd-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><p>Case definitions are essential tools for public health practitioners. They are used to identify, monitor, and respond to diseases or groups of diseases [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref2">2</xref>]. They inform the public, orient public health policies, and guide surveillance indicators, such as syndrome definitions [<xref ref-type="bibr" rid="ref3">3</xref>]. In the context of public health surveillance, terminology can often be fragmented. While case definitions typically describe strict clinical and laboratory criteria for identifying specific diseases, the criteria used for early warning systems are referred to in the literature as either syndrome definitions or syndromic indicators. Due to the lack of global consensus [<xref ref-type="bibr" rid="ref4">4</xref>,<xref ref-type="bibr" rid="ref5">5</xref>], these 2 terms are frequently used interchangeably in practice; therefore, throughout this paper, we treat them as synonymous.</p><p>Developing case definitions requires expert knowledge of the targeted disease(s), consultation of existing definitions, and analysis of available clinical data. Case definitions are usually written as free-text descriptions of the key characteristics and criteria of the target disease or public health threat. The goal of a case definition is to provide a consistent description for public health officials, health workers, policymakers, and the general public so they can understand a given threat. <xref ref-type="fig" rid="figure1">Figure 1</xref> shows two different case definitions for the same disease from different provenance [<xref ref-type="bibr" rid="ref6">6</xref>,<xref ref-type="bibr" rid="ref7">7</xref>]. The lack of interoperability and standardization of syndromic indicators and case definitions makes it difficult to compare systems and epidemiological situations across different regions and time periods [<xref ref-type="bibr" rid="ref8">8</xref>]. Free-text definitions, in particular, introduce a fundamental gap in translation between human and machine interpretation, resulting in inconsistent and error-prone automated processing [<xref ref-type="bibr" rid="ref9">9</xref>]. Beyond mere structural inconsistencies, free-text descriptions are highly susceptible to linguistic ambiguity and semantic fragmentation [<xref ref-type="bibr" rid="ref10">10</xref>]. While complex logical relationships and clinical nuances appear straightforward to trained public health professionals, they become deeply problematic for computational systems.</p><fig position="float" id="figure1"><label>Figure 1.</label><caption><p>Measles case definition comparison. The definitions from the European Centre for Disease Prevention and Control [<xref ref-type="bibr" rid="ref6">6</xref>] and India [<xref ref-type="bibr" rid="ref7">7</xref>] are presented on the left and right, respectively. ECDC: European Centre for Disease Prevention and Control.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="jmir_v28i1e86249_fig01.png"/></fig><p>Consequently, effectively translating these definitions for automated surveillance requires more than just a technical data container; it necessitates a robust methodology to resolve semantic ambiguities and ensure true interoperability across diverse health systems [<xref ref-type="bibr" rid="ref11">11</xref>]. Examples of these relationships are conditions and nested criteria. These inconsistencies can delay coordinated responses during critical time periods when containing emerging threats is essential. For example, inconsistent case definitions had far-reaching consequences for global disease surveillance and control efforts during the COVID-19 pandemic. The consequences of these issues include underreporting and misclassifications of cases [<xref ref-type="bibr" rid="ref12">12</xref>], compromised accuracy of surveillance data [<xref ref-type="bibr" rid="ref13">13</xref>-<xref ref-type="bibr" rid="ref15">15</xref>], compromised resource allocation [<xref ref-type="bibr" rid="ref16">16</xref>], and problems comparing disease burden and intervention effectiveness across countries [<xref ref-type="bibr" rid="ref17">17</xref>]. The variability in case definitions and the absence of structured formats to standardize them represent a surprisingly underexplored gap in public health infrastructure. Moreover, the dearth of structured formats imposes substantial constraints on embracing artificial intelligence (AI) and eventually implementing AI for surveillance and outbreak detection. As health systems increasingly rely on computational approaches, a significant challenge to the use of AI as a technology for protecting public health is posed by the gap between narrative definitions and machine-readable formats. When disease indicators are monitored in real-time or near real-time for the purpose of early outbreak detection, it is imperative to use a structured format for case definitions. This is especially true in the context of syndromic surveillance, where automated data acquisition is used [<xref ref-type="bibr" rid="ref18">18</xref>].</p><p>Earlier studies [<xref ref-type="bibr" rid="ref19">19</xref>,<xref ref-type="bibr" rid="ref20">20</xref>] verify that using a structured format for clinical standards in case definitions leads to enhanced reporting accuracy compared to narrative descriptions. These studies emphasize that even well-defined case definitions can be interpreted differently by various users, which can lead to misclassifications, decreased sensitivity, and lower positive predictive value. A highlight from the related work is when case definitions were successfully used to automate the generation of a new case definition and translate an existing one from medical conditions [<xref ref-type="bibr" rid="ref21">21</xref>]. Notwithstanding this evidence, at the time of writing, no comprehensive effort has been made to develop a standardized, machine-readable format for case definitions. Adopting this format would eliminate any inconsistencies in interpretation and lay the groundwork for the infrastructure needed to support next-generation public health surveillance systems.</p><p>As a matter of fact, clearly structured definitions can help AI systems identify diseases and track outbreaks more effectively. This enables machine learning (ML) programs to detect cases more accurately, notice subtle changes in disease patterns, and alert us to emerging health threats, even when traditional symptoms are not yet evident.</p><p>To achieve these capabilities, the core scientific contribution of this work is the development of a methodology for semantic interoperability in public health surveillance. As the practical implementation of this methodology, we introduce the Open Syndrome Definition (OSD) framework, a name chosen to reflect its capacity to encompass both case and syndrome definitions. Moving beyond basic structural formatting, our approach leverages a machine-readable representation integrated with semantic web principles (JSON for Linked Data [JSON-LD]) [<xref ref-type="bibr" rid="ref22">22</xref>,<xref ref-type="bibr" rid="ref23">23</xref>] and established medical ontologies, successfully bridging the gap between human readability and computational precision. Consequently, OSD offers the flexibility and clarity required to create precise, unambiguous descriptions suitable for AI applications and diverse software tools. Furthermore, by incorporating metadata often missing from traditional narratives, the framework enables the reuse of definitions, cross-jurisdictional comparisons, version control, and downstream applications in ML projects, benchmarks, and publications. Importantly, OSD complements, rather than replaces, traditional narrative structures. By systematically disambiguating epidemiological criteria, it allows computational systems to precisely interpret case definitions, facilitating automated classification and seamless integration into modern digital surveillance pipelines.</p><p>In conjunction with the OSD format, this work presents the inaugural dataset of its kind: a collection of machine-readable case definitions for a plethora of diseases from a network of countries spanning 5 continents: the Americas, Europe, Africa, Oceania, and Asia. True interoperability between jurisdictions, reproducible research, and a foundation for more responsive, data-driven approaches to disease surveillance and public health emergency management are all enabled by this common &#x201C;language.&#x201D;</p><p>Case definitions play a critical role in public health and epidemiological research. However, they often suffer from ambiguity and a lack of standardization. This limits their utility for computational processing and interoperability. This study addresses the critical gap in translation between human-readable and machine-processable case definitions. To accomplish this, the study has three main goals: (1) to create the OSD, a standardized, machine-readable format that preserves the logical complexity of case definitions while eliminating ambiguity, (2) to enable the conversion of the machine-readable format to free text and vice versa, which is a necessary operational feature, and (3) to compile the first extensive dataset of structured case definitions that covers various diseases and jurisdictions. This study aims to support future advances in public health surveillance, syndromic surveillance, and broader epidemiological research by pursuing these goals.</p></sec><sec id="s2" sec-type="methods"><title>Methods</title><sec id="s2-1"><title>Overview</title><p>This section describes the methodology used to develop the OSD format, dataset, and supporting tools. We took an iterative approach to the development process. The format evolved through continuous refinement as we collected case definitions, which became our dataset, and created supporting tools. Although this section is organized into distinct subsections for clarity, these components were developed concurrently and informed each other throughout the research process.</p></sec><sec id="s2-2"><title>Definitions Dataset</title><p>Case definitions have specific guidelines regarding writing style. These guidelines emphasize simplicity and conciseness and encourage the use of a narrative format [<xref ref-type="bibr" rid="ref1">1</xref>]. Many governmental [<xref ref-type="bibr" rid="ref24">24</xref>-<xref ref-type="bibr" rid="ref26">26</xref>] and public health organizations [<xref ref-type="bibr" rid="ref6">6</xref>,<xref ref-type="bibr" rid="ref27">27</xref>] publicly share their case definitions to ensure that health workers and the public can access information about monitored diseases and their characteristics.</p><p>To identify scientific papers mentioning datasets related to case or syndrome definitions, we searched for the keywords <italic>case definition AND dataset</italic> and <italic>syndrome definition AND dataset</italic> across various scientific sources, as shown in <xref ref-type="table" rid="table1">Table 1</xref>. The choice of terms reflects the core concepts of interest, case definitions, and their association with publicly available or structured datasets. Our goal was to cast a broad net without overly constraining the results, so we used simple, inclusive phrases and applied no filters based on time or language. However, despite these efforts, we were unable to identify any relevant datasets.</p><p><xref ref-type="table" rid="table1">Table 1</xref> summarizes the results of searches conducted in 2 scientific article databases (PubMed and OpenAlex) and 3 dataset platforms: Kaggle, Hugging Face, and Harvard Dataverse. The searches yielded 286 results for &#x201C;case definition dataset&#x201D; and 12 results for &#x201C;syndrome definition dataset,&#x201D; but none of these were actual datasets for case or syndrome definitions.</p><table-wrap id="t1" position="float"><label>Table 1.</label><caption><p>Search queries for case and syndrome definition datasets accessed on March 24, 2025.</p></caption><table id="table1" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Source</td><td align="left" valign="bottom">&#x201C;case definition&#x201D; AND &#x201C;dataset&#x201D;</td><td align="left" valign="bottom">&#x201C;syndrome definition&#x201D; AND &#x201C;dataset&#x201D;</td></tr></thead><tbody><tr><td align="left" valign="top">PubMed [<xref ref-type="bibr" rid="ref28">28</xref>]</td><td align="left" valign="top">53 results</td><td align="left" valign="top">0 results</td></tr><tr><td align="left" valign="top">OpenAlex [<xref ref-type="bibr" rid="ref29">29</xref>]</td><td align="left" valign="top">213 results</td><td align="left" valign="top">12 results</td></tr><tr><td align="left" valign="top">Kaggle [<xref ref-type="bibr" rid="ref30">30</xref>]</td><td align="left" valign="top">4 results</td><td align="left" valign="top">0 results</td></tr><tr><td align="left" valign="top">HuggingFace [<xref ref-type="bibr" rid="ref31">31</xref>]</td><td align="left" valign="top">2 results</td><td align="left" valign="top">0 results</td></tr><tr><td align="left" valign="top">Harvard Dataverse [<xref ref-type="bibr" rid="ref32">32</xref>]</td><td align="left" valign="top">14 results</td><td align="left" valign="top">0 results</td></tr><tr><td align="left" valign="top">Total</td><td align="left" valign="top">286 results</td><td align="left" valign="top">12 results</td></tr><tr><td align="left" valign="top">Confirmed datasets</td><td align="left" valign="top">0</td><td align="left" valign="top">0</td></tr></tbody></table></table-wrap><p>Because our initial search yielded no results, we took a targeted approach to compile case and syndrome definitions for our dataset. Our methodology was a multistep heuristic process.</p><p>First, we focused exclusively on World Health Organization (WHO) member countries. Second, to ensure balanced representation, we aimed to include countries from each of the following continents: the Americas, Europe, Africa, Oceania, and Asia. Third, for each country, we began with the search query &#x003C;country name&#x003E; AND case definitions; if no relevant results were found, we successively tried <italic>&#x003C;</italic>country name&#x003E; AND syndromic surveillance definitions and &#x003C;country name&#x003E; AND syndrome definitions. Fourth, we aimed to include at least one definition from at least 10% of the countries within each continent. Once this target was met, we moved on to the next country and then the next continent. When a country provided multiple definitions on a single webpage or PDF, we selected a disease or group of diseases accordingly. DuckDuckGo was our primary search engine. For every successful find, we exported the web page or PDF file featuring case definitions. Since a page or file may contain multiple definitions, we extracted the text to create a machine-readable version. Among WHO member countries, Japan, Indonesia, Russia, and Cuba had no public definitions.</p><p>Ultimately, we collected a total of 40 case definitions. This collection comprises 36 national and regional definitions that collectively represent 60 countries. This coverage is reached primarily because a single regional definition from the Pacific Public Health Surveillance Network accounts for a group of 22 nations. Of the remaining 4 definitions, 3 are from continental organizations (the Pan American Health Organization, the European Centre for Disease Prevention and Control, and the Africa Centers for Disease Control and Prevention) and 1 is from the WHO as a global entity.</p><p>As we gathered definitions, common themes surfaced across various diseases and regions, including symptoms, diseases, epidemiological links, laboratory tests, and medical evaluations. We also recognized recurring logical patterns in how definitions combined criteria. Operators like AND, OR, and especially AT LEAST were frequently used. The AT LEAST operator called for a specific number of criteria to be met, as illustrated in definitions such as &#x201C;fever AND at least two symptoms from: cough, headache, loss of smell, back pain.&#x201D; Through careful analysis of existing definitions and relevant scientific literature [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref4">4</xref>,<xref ref-type="bibr" rid="ref13">13</xref>], we pinpointed important metadata elements that, while often found in the broader context of portals and publications, were not consistently included with the definitions themselves. These insights led to the continuous development of our OSD format, which we cover in detail in the Schema subsection.</p><p>The dataset is organized into sections for human and machine readability. The human-readable section contains original PDF publications, which may cover multiple diseases, as well as TXT files that contain case definitions converted into our format. To ensure consistency, all web-published definitions were exported to PDF. The machine-readable section includes JSON representations of these definitions that have been validated with the Ajv validator [<xref ref-type="bibr" rid="ref33">33</xref>]. The dataset is available on the Open Syndrome Initiative (OSI) page at HuggingFace and GitHub.</p></sec><sec id="s2-3"><title>Schema</title><p>The OSD format converts traditional, narrative case definitions into a structured JSON schema with defined properties and types. These properties were derived through an in-depth review of a wide range of existing case definitions to ensure they reflect common structural and semantic elements found in real-world usage. When analyzing case definitions of different public health threats from various countries and continents, we identified consistent patterns in information groups. More details are provided in the Definitions Dataset subsection. We observed that the majority (217/407, 53%) relied on symptoms to describe conditions, followed by diagnosis (34/407, 8%). Additionally, half of the case definitions included criteria that resembled logical operators (such as AND, OR, and AT LEAST) to group-related conditions. Based on these observations, we developed a nested structure using a data-driven approach combined with our established principles. As illustrated in <xref ref-type="fig" rid="figure2">Figure 2</xref>, we organized the format into 2 main groups of information: metadata and criteria. We describe both groups of information below.</p><fig position="float" id="figure2"><label>Figure 2.</label><caption><p>The human- and machine-readable case definitions of influenza-like illness from the Ministry of Health of Brazil are in the Open Syndrome definition format. The human-readable case definition is shown in the gray box on the right. The machine-readable definition is on the left and uses the JSON format proposed by this work. There are two additional boxes on the sides: the green upper box indicates the metadata fields, and the pink box outlines the inclusion and exclusion criteria.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="jmir_v28i1e86249_fig02.png"/></fig><p>First, the metadata information group provides essential context about the definition itself. This includes version information, scope (broad or sensitive vs narrow or specific), publication details, the responsible organization, the language used, and other provenance information. Typically, in narrative formats, this metadata is presented implicitly within published documents or websites (eg, European Centre for Disease Prevention and Control case definitions [<xref ref-type="bibr" rid="ref6">6</xref>], WHO Outbreak Toolkit [<xref ref-type="bibr" rid="ref34">34</xref>]). We enabled efficient information retrieval, version tracking, and proper attribution by explicitly structuring this information in a machine-readable format. Furthermore, the metadata information group consists of metadata properties that capture essential contextual information about the case definition, ranging from basic identification (title and description) to publication details (published_in, published_at, and authors) and geographical context (location and language). We included properties for tracking the status and version of the case definition itself and the OSD schema. Most metadata elements are derived directly from the original case definitions, though several properties, such as Open Syndrome Version, $schema, and the JSON-LD @context, are specific to our format&#x2019;s technical infrastructure and initiative, which we discuss further in the OSI section. These and other individual metadata properties and their meaning can be seen in <xref ref-type="table" rid="table2">Table 2</xref>.</p><p><xref ref-type="table" rid="table2">Table 2</xref> outlines the first-level properties of the Open Syndrome Definition format, dividing them into metadata and criteria. The metadata category contains information about the format and syndrome, including location, organization, and version. The criteria category contains structured information that defines the syndrome itself.</p><table-wrap id="t2" position="float"><label>Table 2.</label><caption><p>Breakdown of the format by group of information types.</p></caption><table id="table2" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Key components and their properties</td><td align="left" valign="bottom">Description</td></tr></thead><tbody><tr><td align="left" valign="top" colspan="2">Metadata</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>$schema</td><td align="left" valign="top">URI<sup><xref ref-type="table-fn" rid="table2fn1">a</xref></sup> pointing to the JSON<sup><xref ref-type="table-fn" rid="table2fn2">b</xref></sup> schema that validates this document</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>@context</td><td align="left" valign="top">JSON-LD<sup><xref ref-type="table-fn" rid="table2fn1">b,c</xref></sup> context mapping for semantic interoperability</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>@type</td><td align="left" valign="top">Semantic class (eg, OSD<sup><xref ref-type="table-fn" rid="table2fn4">d</xref></sup>: case definition or OSD<sup><xref ref-type="table-fn" rid="table2fn4">d</xref></sup>: syndrome definition)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>ID</td><td align="left" valign="top">Unique ID from the OSI<sup><xref ref-type="table-fn" rid="table2fn5">e</xref></sup></td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Title</td><td align="left" valign="top">Case definition title</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Description</td><td align="left" valign="top">A detailed description of the definition</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Human-readable definition</td><td align="left" valign="top">A human-readable description of the case definition for user interfaces</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Scope</td><td align="left" valign="top">Level of specificity. Options: broad or specific</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Created at</td><td align="left" valign="top">Date when the definition was created</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Published in</td><td align="left" valign="top">Source or platform where the definition was published</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Published at</td><td align="left" valign="top">Date and time in UTC<sup><xref ref-type="table-fn" rid="table2fn6">f</xref></sup> when the definition was published</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Published by</td><td align="left" valign="top">List of case definition publishers in the OSI<sup><xref ref-type="table-fn" rid="table2fn5">e</xref></sup></td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Authors</td><td align="left" valign="top">List of the case definition authors</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Location</td><td align="left" valign="top">Geographical location relevant to the schema&#x2019;s application</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Language</td><td align="left" valign="top">Language in which the definition is written (eg, English and Spanish)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Organization</td><td align="left" valign="top">Organization or initiative responsible for the schema maintenance</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Status</td><td align="left" valign="top">Current status of the definition. Options: draft, published, and deprecated</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Keywords</td><td align="left" valign="top">Keywords related to a definition (eg, COVID-19 and mpox, outbreaks)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Target public health threats</td><td align="left" valign="top">List of public health threats that this definition targets</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Notes</td><td align="left" valign="top">Any notes that may be relevant to this definition</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Category</td><td align="left" valign="top">Case definition categories [<xref ref-type="bibr" rid="ref1">1</xref>]. Options: confirmed, probable, suspected</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Version</td><td align="left" valign="top">Case definition version to be established by the author</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Open Syndrome version</td><td align="left" valign="top">Open Syndrome Definition schema version. Currently: v1 (version 1)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Definition type</td><td align="left" valign="top">Case definition or syndromic indicator</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Surveillance system type</td><td align="left" valign="top">Type of surveillance system this definition is</td></tr><tr><td align="left" valign="top" colspan="2">Criterion</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Criterion (meta-type)</td><td align="left" valign="top">Set of properties and operators to select a case (details in <xref ref-type="table" rid="table3">Table 3</xref>)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Inclusion criteria (criterion)</td><td align="left" valign="top">Criterion used to include a case</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Exclusion criteria (criterion)</td><td align="left" valign="top">Criterion used to exclude a case</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>References</td><td align="left" valign="top">Scientific references that have supported this definition</td></tr></tbody></table><table-wrap-foot><fn id="table2fn1"><p><sup>a</sup>URI: uniform resource identifier.</p></fn><fn id="table2fn2"><p><sup>b</sup>JSON: JavaScript object notation.</p></fn><fn id="table2fn3"><p><sup>c</sup>LD: linked data.</p></fn><fn id="table2fn4"><p><sup>d</sup>OSD: Open Syndrome definition.</p></fn><fn id="table2fn5"><p><sup>e</sup>OSI: Open Syndrome initiative.</p></fn><fn id="table2fn6"><p><sup>f</sup>UTC: Coordinated Universal Time</p></fn></table-wrap-foot></table-wrap><p>Second, the inclusion and exclusion criteria specify the conditions that determine whether a case meets the definition or should be excluded. These conditions, referred to as criteria, can be combined using logical operators (AND, OR, and AT_LEAST) to express complex clinical relationships. We organized the data into key and value pairs (properties) within the JSON schema, enabling computational systems to interpret simple and compound conditions precisely. This structured approach eliminated ambiguities often present in narrative text [<xref ref-type="bibr" rid="ref35">35</xref>] while preserving the clinical intent of the original definitions. The criteria properties form the backbone of the format and are structured to capture the logical relationships inherent in case definitions. The format distinguishes between inclusion and exclusion criteria, both built upon our criterion meta-type, as described in <xref ref-type="table" rid="table3">Table 3</xref>. This structure enables the representation of complex clinical reasoning within a machine-readable framework.</p><table-wrap id="t3" position="float"><label>Table 3.</label><caption><p>Criterion meta-type properties (summary of properties of a criterion and a fundamental component of a definition).</p></caption><table id="table3" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Property</td><td align="left" valign="bottom">Description</td><td align="left" valign="bottom">Usage details</td></tr></thead><tbody><tr><td align="left" valign="top">Type</td><td align="left" valign="top">Type of criterion. Options: criterion, syndrome, symptom, diagnosis, diagnostic_test, professional_judgment, epidemiological_history, and demographic_criteria</td><td align="left" valign="top">The type property is mandatory, followed by name or values</td></tr><tr><td align="left" valign="top">Name</td><td align="left" valign="top">Criterion label</td><td align="left" valign="top">&#x2014;<sup><xref ref-type="table-fn" rid="table3fn1">a</xref></sup></td></tr><tr><td align="left" valign="top">Description</td><td align="left" valign="top">Detailed description of the criterion</td><td align="left" valign="top">&#x2014;</td></tr><tr><td align="left" valign="top">Ontology ID</td><td align="left" valign="top">The compacted identifier from an ontology (eg, Disease Ontology (DOID))</td><td align="left" valign="top">Examples: hpo:0002045 and mondo:0020674. Facilitates semantic interoperability</td></tr><tr><td align="left" valign="top">Logical operator</td><td align="left" valign="top">Keywords that represent a logical operation on criteria. Options: AND, OR, and AT_LEAST</td><td align="left" valign="top">The logical operator AT_LEAST must be used with the number specified in logical_operator_arguments</td></tr><tr><td align="left" valign="top">Logical operator arguments</td><td align="left" valign="top">List of arguments to be passed to the logical operator</td><td align="left" valign="top">&#x2014;</td></tr><tr><td align="left" valign="top">Attribute</td><td align="left" valign="top">The referred attribute, for example, body temperature, age, and onset</td><td align="left" valign="top">It is used in composition with operator and value</td></tr><tr><td align="left" valign="top">Value</td><td align="left" valign="top">The reference value for the referred attribute. It could be of any data type because it can represent anything in the real world</td><td align="left" valign="top">Examples: true, 37.6, abnormal but non-specific bowel gas pattern. This property is used in composition with attribute and operator</td></tr><tr><td align="left" valign="top">Operator</td><td align="left" valign="top">Comparison and matching operators. Options: &#x003E;, &#x003E;=, &#x003C;, &#x003C;=, ==, !=, regex</td><td align="left" valign="top">It is used in composition with value and attribute</td></tr><tr><td align="left" valign="top">Regex pattern</td><td align="left" valign="top">Regular expression for evaluation and pattern matching</td><td align="left" valign="top">It is used in composition with operator</td></tr><tr><td align="left" valign="top">Regex flags</td><td align="left" valign="top">Regular expression flags for extra configuration</td><td align="left" valign="top">It is used in composition with regex_pattern</td></tr><tr><td align="left" valign="top">Code</td><td align="left" valign="top">A system-agnostic diagnosis code object that holds system, code, and display</td><td align="left" valign="top">Useful to represent values from the <italic>ICD</italic><sup><xref ref-type="table-fn" rid="table3fn2">b</xref></sup>, SNOMED CT<sup><xref ref-type="table-fn" rid="table3fn3">c</xref></sup>, and others</td></tr><tr><td align="left" valign="top">Values</td><td align="left" valign="top">A list whose types are a criterion</td><td align="left" valign="top">The criteria items should be unique</td></tr></tbody></table><table-wrap-foot><fn id="table3fn1"><p><sup>a</sup>Not applicable.</p></fn><fn id="table3fn2"><p><sup>b</sup><italic>ICD</italic>: <italic>International Statistical Classification of Diseases and Related Health Problems</italic>. </p></fn><fn id="table3fn3"><p><sup>c</sup>SNOMED-CT: Systematized Nomenclature of Medicine&#x2014;Clinical Terms.</p></fn></table-wrap-foot></table-wrap><p>To ensure true semantic interoperability, the OSD format is designed as a valid JSON-LD document. JSON-LD allows a standard JSON file to act as a graph of linked data simply by defining a context. Each definition includes a @context field that explicitly maps the schema properties to universally accepted medical ontologies and vocabularies, such as Schema.org [<xref ref-type="bibr" rid="ref36">36</xref>], BioSchemas [<xref ref-type="bibr" rid="ref37">37</xref>], and the Human Phenotype Ontology (HPO) [<xref ref-type="bibr" rid="ref38">38</xref>]. This transformation gives explicit semantic meaning to each string field, allowing automated systems to interpret the exact biological or epidemiological concept behind the text [<xref ref-type="bibr" rid="ref39">39</xref>]. Furthermore, to guarantee long-term machine readability, every generated JSON file includes a $schema property pointing to a persistent, community-governed uniform resource identifier hosted via the W3C permanent identifier infrastructure (eg, [<xref ref-type="bibr" rid="ref40">40</xref>]). This approach provides a stable reference to the validation schema regardless of future repository migrations.</p><p>To capture the logical nature of the collected definitions, we implemented a flexible attribute-operator-value pattern. This pattern can accommodate diverse clinical observations ranging from simple Boolean conditions to complex pattern matching through regular expressions. The format supports numerical comparisons (&#x003E;, &#x003E;=, &#x003C;, &#x003C;=, ==, and !=) and text pattern matching (eg, regular expressions and through regex), enabling precise representation of quantitative thresholds and textual patterns. Please note that logical operators are available at the criteria level (see the Schema section). To handle semantic mapping, the format provides two specialized properties: ontology_id is used to represent compacted uniform resource identifiers for broad medical concepts (eg, HPO or MONDO Disease Ontology [MONDO] identifiers), while the code object is used to capture specific, system-agnostic clinical billing or diagnostic codes from systems like <italic>ICD</italic> (<italic>International Statistical Classification of Diseases and Related Health Problems</italic>) [<xref ref-type="bibr" rid="ref41">41</xref>] or SNOMED CT (Systematized Nomenclature of Medicine&#x2014;Clinical Terms) [<xref ref-type="bibr" rid="ref42">42</xref>].</p><p>The criterion structure can be defined recursively. The values property enables criteria to be nested within other criteria. This design allows us to represent complex logical relationships, ranging from simple symptom lists to intricate decision trees with multiple levels of criteria. With this property structure, we strike a balance between human readability and machine interpretability. We preserve the clinical logic of narrative case definitions while enabling computational processing and analysis.</p></sec><sec id="s2-4"><title>OSD Format</title><p>Our schema was developed iteratively, resulting in the creation of the OSD format. We designed the format to include both inclusion and exclusion criteria, reflecting common patterns found in various case definitions. We extracted essential metadata components, such as location, publication date, title, authors, and citation details, from the publishing websites and accompanying papers. This framework for metadata ensures proper attribution and provides contextual information for each definition.</p><p>We manually adapted the case definition texts into JSON format and used validation tools to verify their structural integrity and compliance. Transforming the texts from human-readable to machine-readable format revealed additional patterns and edge cases, which informed iterative refinements to the format. Throughout this iterative development process, we prioritized flexibility while maintaining structural consistency. This allowed the format to represent definitions across varied health systems, geographical regions, and clinical contexts.</p></sec><sec id="s2-5"><title>Tooling</title><p>As we developed the OSD format and dataset, we realized that supporting tools were necessary to facilitate the conversion of human-readable case definitions into machine-readable formats. We developed a Python-based toolkit to streamline this process, enabling researchers and public health professionals to efficiently translate traditional text-based case definitions into structured JSON representations, and vice versa. The toolkit used Python version 3.11 (Python Software Foundation).</p><p>We implemented a conversion utility that leverages large language models (LLMs) to automate the structuring process. To accommodate different user preferences and computational resources, the tool supports local, privacy-preserving deployment via Ollama [<xref ref-type="bibr" rid="ref43">43</xref>] (version 0.9.6), as well as direct application programming interface (API) integration with major cloud providers, including OpenAI, Anthropic, Google Gemini, Mistral, and DeepSeek. We systematically evaluated multiple local models, including llama-3.2 (Meta) [<xref ref-type="bibr" rid="ref44">44</xref>], mistral-7b (Mistral AI) [<xref ref-type="bibr" rid="ref45">45</xref>], and deepseek-r1 (DeepSeek; both 7b and 8b variants) [<xref ref-type="bibr" rid="ref46">46</xref>], medllama2 (Siraj Raval) [<xref ref-type="bibr" rid="ref47">47</xref>], and qwen2.5-coder (Alibaba Cloud) [<xref ref-type="bibr" rid="ref48">48</xref>]. We evaluated the performance of the models based on their ability to comprehend medical terminology accurately and remain faithful to the original clinical meaning. Of the models evaluated, llama-3.2, mistral-7b, and deepseek-r1 demonstrated adequate performance for the conversion task. Mistral-7b was selected as the recommended default due to its optimal balance of accuracy and resource requirements.</p><p>Additionally, we developed a reverse conversion function that transformed machine-readable JSON syndrome definitions into human-readable formats that support multiple languages. This bidirectional conversion capability ensures accessibility for diverse user groups and facilitates international collaboration in syndromic surveillance systems.</p><p>To evaluate the structural and semantic interoperability of the OSD format, we developed a data filtering prototype designed to process tabular health datasets against our standardized JSON-LD schema [<xref ref-type="bibr" rid="ref49">49</xref>].</p></sec><sec id="s2-6"><title>OSI</title><p>To promote the adoption and ongoing development of the OSD format, we created the OSI, a collaborative community platform. The OSI serves as the central hub for all OSD-related resources and provides an infrastructure for sharing, collaboration, and knowledge exchange. We cover the methodology behind the website and its functionalities in the following text.</p><p>First, to ensure transparency and promote collaboration, we developed the OSI website using open-source tools and libraries. We use Hugo [<xref ref-type="bibr" rid="ref50">50</xref>] (version v0.145.0) to create the static website, which is built using Markdown [<xref ref-type="bibr" rid="ref51">51</xref>] files. This allows other community members to easily improve or expand the website content with new blog posts or tutorials.</p><p>Second, the website provides a user-friendly contribution workflow for submitting new definitions. This functionality is enabled through pull requests or a simplified web interface, as depicted in <xref ref-type="fig" rid="figure3">Figure 3</xref>.</p><p>All contributions undergo a community review and are published on our website after receiving approval and technical validation. When a definition is submitted via the form, one of the maintainers will create a GitHub pull request on the user&#x2019;s behalf in our repository. Despite their technical skill level, the user can view the submission publicly available in our repository and provide feedback. This process helps us maintain a clear history of contributions and ensures that all definitions are version-controlled. The submission then follows the same process as any other pull request, including validation and community review. We developed the entire toolkit as open-source software and hosted it on GitHub to enable collaborative development and continuous improvement. To ensure quality and authenticity, we verify the institutional affiliations of first-time contributors.</p><p>We implemented the automated validation workflows using GitHub Actions to maintain quality and consistency. These workflows verify JSON schema compliance for each definition and validate new submissions against established formatting requirements. The workflow verifies the conformity of submitted JSON files with a predefined schema. This process ensures that the JSON structure is well-formed, that all required fields are present and correctly named according to the schema specifications, and that the values adhere to the expected data formats (eg, URLs and dates). We open-sourced the entire toolkit and hosted it on GitHub to enable community contributions and continuous improvement through collaborative development. Our workflow is straightforward: we use a GitHub repository as the central hub where all definitions are stored and managed. Anyone can submit a new definition through the submission contact form or directly via a GitHub pull request. Additionally, individuals can verify their affiliations and become recognized contributors. To do so, they must provide a valid email address and their organization&#x2019;s name through the verification form. Definitions submitted by verified contributors receive a verified tag.</p><p>Third, the website features an interactive graph visualization of the definitions dataset that we developed using D3 [<xref ref-type="bibr" rid="ref52">52</xref>] (version v7.9.0), an open-source JavaScript library for visualizing data, and D3 force-directed graph layout using velocity Verlet integration. This feature allows users to explore the dataset intuitively by panning, zooming, and viewing tooltips that display important information about each criterion. The visualization automatically updates to reflect changes in the underlying dataset, so users always see the most current information.</p><fig position="float" id="figure3"><label>Figure 3.</label><caption><p>Open Syndrome Initiative contribution workflow. PR: pull request.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="jmir_v28i1e86249_fig03.png"/></fig><p>Fourth and last, the web interface includes extra documentation with instructions and suggestions on how to read, work with, and upload case definitions. For those unfamiliar with the format, we provide extensive instructions on how to convert text-based definitions to the OSI format using our tooling.</p></sec><sec id="s2-7"><title>Ethical Considerations</title><p>This study was exempt from institutional ethics review as it exclusively uses publicly available case definitions published by governmental and public health organizations. No patient data, personal information, or human subjects were involved.</p></sec></sec><sec id="s3" sec-type="results"><title>Results</title><sec id="s3-1"><title>Overview</title><p>This section examines the structure and characteristics of the case definition format. It introduces the first case definition dataset and its maintenance tools. Finally, it presents the OSI, a collaborative community built around this ecosystem.</p></sec><sec id="s3-2"><title>The OSD Format</title><p>The OSD format was designed to accurately preserve the clinical meaning of case definitions in a machine-readable representation. Examining various case definitions reveals that, despite differences in language and style, they consistently strive to be clear, simple, and concise [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref53">53</xref>]. We have also deliberately incorporated these essential qualities into our format.</p><p>The OSD format is a JSON schema, a standard for defining the structure and rules of JSON data. With a JSON schema, one can describe the overall structure of a definition, including its inclusion and exclusion criteria, metadata, properties, and more. The OSD format transforms traditional narrative case definitions into structured, machine-readable representations. The version used for the proposed OSD format is draft 2020&#x2010;12 [<xref ref-type="bibr" rid="ref54">54</xref>]. For example, a free-text definition for influenza-like illness (right side of <xref ref-type="fig" rid="figure2">Figure 2</xref>) becomes a structured JSON object (left side of <xref ref-type="fig" rid="figure2">Figure 2</xref>) specifying each criterion with precise logical relationships and the metadata of this definition.</p><p>This structured format explicitly defines signs, quantitative attributes, and temporal constraints, which could be ambiguous in a free-text narrative. Additionally, it incorporates metadata, such as provenance and version history, which is often missing from traditional formats. This structured approach enables computational systems to accurately interpret case definitions, thereby facilitating automated case classification, cross-jurisdictional comparison, and application in ML pipelines. Several core principles guided our design decisions.</p><list list-type="order"><list-item><p>Preservation of clinical meaning: Maintaining the integrity of case definitions while keeping a structured format and preserving essential clinical details.</p></list-item><list-item><p>AI Readiness: Designing with computational processing in mind to enable the large-scale analysis and application of AI.</p></list-item><list-item><p>Interoperability: Ensuring seamless integration with existing and future systems across different platforms, following the Findability, Accessibility, Interoperability, and Reusability (FAIR) principles [<xref ref-type="bibr" rid="ref55">55</xref>] and promoting interoperability [<xref ref-type="bibr" rid="ref56">56</xref>].</p></list-item><list-item><p>Openness and Accessibility: Developing a reusable, open format that is free to use and not tied to proprietary platforms.</p></list-item><list-item><p>Decentralization: Allowing for independent implementation across websites, scientific publications, and surveillance systems without central control.</p></list-item><list-item><p>Versionability: Supporting the evolution of definitions over time with clear tracking of changes.</p></list-item></list><p>The format addresses several critical challenges in the current landscape. These challenges include a lack of standardization in case definitions, potential ambiguity in text-based definitions, fragmented information caused by inconsistent metadata, and barriers to large-scale AI applications resulting from format inconsistencies. Even though the technical nature of the format might require some knowledge of JSON, we alleviate this by providing user-friendly tools described in the Tooling section.</p><p>This format has several promising applications, including testing definitions against electronic health record data, creating ML models for automated case detection, comparing disease definitions across countries, and enhancing reproducibility in epidemiological research.</p><p>While our collection of data effectively documented a wide array of diseases and methods of definition, we recognize the necessity for subsequent improvements. To that end, we included a version field in the format specification. This allows us to maintain backward compatibility and accommodate future improvements as the field evolves. This versioning approach allows the OSD format to adapt to emerging needs while preserving access to historical definitions. The resulting format balances machine readability with a faithful representation of the original clinical intent. This enables automated processing while preserving the essential diagnostic criteria established by public health authorities.</p></sec><sec id="s3-3"><title>Definitions Dataset</title><p>The first comprehensive dataset of machine-readable case definitions was developed, including definitions from 60 countries, 3 continental organizations, and one global organization. The dataset contains 40 case definitions for various diseases and is available in 3 formats: OSD JSON, plain text, and the original PDF publications. The definitions cover a wide range of disease categories: vector-borne illnesses (n=8), viral respiratory diseases (n=7), bacterial diseases, vaccine-preventable diseases (n=6 each), gastrointestinal diseases (n=5), hemorrhagic fevers (n=4), nervous system and other infectious diseases (n=2 each), and climate-related and noninfectious conditions (n=1 each).</p><p>To quantify the structural characteristics of the collected data, we assessed the logical complexity of the definitions within our dataset (<xref ref-type="fig" rid="figure4">Figure 4</xref>). Regarding logical operator usage, AND was the most frequent operator (n=44), followed by OR (n=33) and AT_LEAST (n=28). Regarding maximum nesting depth, most definitions had a depth of 3 (n=21), followed by depth 2 (n=11), depth 4 or more (n=7), and depth 1 (n=1). The analysis revealed that the majority of the definitions use up to three levels of nesting depth to represent their clinical criteria.</p><p>Measles is the most represented disease, with 4 definitions. Cholera, influenza-like illness, and COVID-19 each have two definitions. This likely reflects their status as priority conditions in national and international public health surveillance initiatives, in which consistent case definitions are crucial for monitoring morbidity and responding to outbreaks. The remaining 30 diseases, ranging from common conditions like dengue fever to rare diseases like Lujo hemorrhagic fever, are each represented by one definition.</p><fig position="float" id="figure4"><label>Figure 4.</label><caption><p>Distribution of maximum nesting depth and the frequency of logical operators in the machine-readable dataset definitions.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="jmir_v28i1e86249_fig04.png"/></fig><p>Additionally, we ensured geographic diversity by selecting at least 10% of countries from each continent, based on the number of countries represented in the United Nations, to guarantee diversity in geographic representation. For example, the Pacific Islands are represented collectively through the PPHSN. This collaborative effort involves 22 Pacific Island countries and territories that develop case definitions together. Although using English for our search terms may have introduced some bias, the definitions were available in a range of languages, as depicted in <xref ref-type="fig" rid="figure5">Figure 5</xref>.</p><fig position="float" id="figure5"><label>Figure 5.</label><caption><p>Distribution of definitions by location and language.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="jmir_v28i1e86249_fig05.png"/></fig></sec><sec id="s3-4"><title>Data Filtering Prototype</title><p>To demonstrate the practical utility and downstream applicability, we successfully implemented a data filtering prototype as a proof-of-concept. This web-based tool, publicly accessible at the Open Syndrome Initiative web page [<xref ref-type="bibr" rid="ref57">57</xref>] (see <xref ref-type="fig" rid="figure6">Figure 6</xref>), enables users to upload their own tabular health datasets and apply our standardized case definitions to automatically identify matching patient records.</p><p>By leveraging the newly integrated JSON-LD schema, which maps definition criteria to established medical ontologies (eg, HPO and Disease Ontology), the prototype ensures semantic consistency even when processing data from diverse health systems.</p><p>This successful implementation validates the structural and semantic interoperability of our format, showcasing its capacity to facilitate automated, cross-platform epidemiological surveillance without ambiguity.</p><fig position="float" id="figure6"><label>Figure 6.</label><caption><p>The data filtering prototype allows users to upload a sample of their own data or interact with a toy dataset to see which cases are matched with Open Syndrome definitions available on the website.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="jmir_v28i1e86249_fig06.png"/></fig></sec><sec id="s3-5"><title>Tooling</title><p>To overcome barriers to adopting our machine-readable case definition format, we developed a Python command-line tool leveraging LLMs via Ollama to convert between traditional text definitions and our structured format [<xref ref-type="bibr" rid="ref43">43</xref>]. The library offers two main features: it automatically generates structured JSON representations from case definitions and converts JSON definitions back to human-readable text for verification and sharing.</p><p>The tool reliably converts between human- and machine-readable formats using either locally hosted LLMs (via Ollama) or prominent cloud-based models. We do not provide a quantitative evaluation of the tool, as this lies beyond the scope of the current work and reflects its auxiliary role in our study. However, our qualitative analysis indicates that Mistral [<xref ref-type="bibr" rid="ref45">45</xref>] yields the most reliable results. The tool demonstrates strong fidelity, avoiding hallucinations and accurately translating text into a machine-readable format. This qualitative assessment was conducted by two coauthors (APGF and AA). The library was also designed to be flexible, offering two distinct customization options for the conversion process.</p><p>First, users can select their preferred LLM model for conversion. Second, users can specify their preferred output language, enabling multilingual text generation, which is essential for the international adoption of public health systems. By incorporating this flexibility into our library, we ensure local usability and minimize dependencies.</p><p>We made the tool available through the Python Package Index under the namespace <italic>opensyndrome</italic> and the project&#x2019;s GitHub repository, enabling integration into existing workflows. We aimed to bridge the gap between technical and nontechnical users by providing extensive documentation on the format, library, and database. This makes the OSD format and its accompanying tools and data accessible to health workers in organizations without dedicated data teams.</p></sec><sec id="s3-6"><title>OSI</title><p>The OSI is a central platform for sharing and accessing case definitions. The initiative&#x2019;s ecosystem facilitates the practical application of standardized case definitions in public health surveillance. It does so by providing a website where users can search, browse, and analyze definitions. The website displays and indexes definitions from the OSI GitHub repository, offering the most current versions and statistical information about the definitions. Users can contribute definitions or suggest modifications via GitHub pull requests or an online form. This dual approach ensures participation from technical and nontechnical users alike. To ensure high-quality dataset definitions, we implemented a two-stage validation process that combines automated static checks with manual semantic verification upon uploading new definitions. The first stage detects missing required fields, formatting inconsistencies, and invalid property types and provides immediate feedback for correction. The second stage involves manually reviewing each definition following the automated checks to semantically validate the uploaded data and ensure its accuracy. The OSI also provides supplementary resources, such as documentation and educational blog posts on best practices.</p><p>These resources [<xref ref-type="bibr" rid="ref58">58</xref>-<xref ref-type="bibr" rid="ref60">60</xref>], alongside the technical infrastructure, support a community of practice around standardized syndrome definitions and enhance global health surveillance capabilities.</p></sec></sec><sec id="s4" sec-type="discussion"><title>Discussion</title><sec id="s4-1"><title>Principal Findings</title><p>This work introduces the OSD format, a novel approach to representing case definitions in machine-readable form. Alongside this format, we presented the first comprehensive dataset of case definitions and developed supporting tools through the OSI to foster a community of practitioners. These contributions address a significant gap in public health surveillance infrastructure, as standardized digital representations of case definitions have been notably absent.</p><p>Our findings demonstrate that the OSD format is flexible enough to accommodate case and syndrome definitions in various clinical contexts. The format&#x2019;s nested structure effectively captures the logical relationships present in traditional case definitions. Based on our curated dataset of structured definitions, we found that 53% (217/407) of the definitions primarily rely on symptom-based criteria. We assessed the logical complexity of each definition by analyzing the depth of nested Boolean expressions and observed that, although logical groupings were common, nesting rarely exceeded three levels. This suggests that complex clinical conditions can be represented within relatively constrained logical structures.</p><p>The development process revealed important insights about case definition structures internationally. The predominance of symptom-based criteria across geographical regions and disease categories indicates a universal approach to case definitions, despite variations in health care systems and resources. Additionally, the recurring use of logical operators across definitions from different origins indicates a natural convergence toward structured diagnostic thinking, which our format explicitly codifies.</p><p>Despite these promising results, our work has several limitations that warrant consideration. First, while the current dataset is diverse, it is relatively small, containing only 40 case definitions. Although our selective sampling technique covered all continents and major health care systems, the limited sample size may not capture the full range of definitions used globally. A further limitation of the current study lies in the evaluation of semantic fidelity during the conversion process. At this stage, the high semantic fidelity of the machine-readable definitions was assessed qualitatively through manual review by two domain experts. While this human-in-the-loop approach established a highly accurate baseline and ensured that clinical intent was preserved, it is inherently resource-intensive and difficult to scale. Furthermore, the lack of established quantitative benchmarks or baselines in this specific field makes comparative evaluation difficult.</p><p>We recognize that this initial, one-of-a-kind dataset will serve as a solid foundation for work ultimately developed by and for the community. This is why we developed additional tools to ensure a streamlined, user-friendly expansion process accessible to nonexperts. Additionally, the multilingual nature of our source definitions enriches the dataset&#x2019;s diversity but introduces complexity in accurately translating clinical concepts across language barriers. To address this issue, we have included a versioning field in the format of our open-source repositories. This enables the community to propose modifications or corrections for any errors or omissions. Furthermore, the lack of established benchmarks or baselines in this field makes comparative evaluation difficult.</p><p>Nevertheless, our work establishes the OSD format at the intersection of significant developments in public health informatics. Its JSON-based structure allows for seamless integration with AI and ML projects, enabling researchers to use these standardized definitions when developing and validating algorithms. As Wang et al [<xref ref-type="bibr" rid="ref61">61</xref>] noted in their work on automating case identification, structured case definitions can significantly improve the efficiency of surveillance systems. Similar applications have demonstrated that standardized case definitions enhance the assessment of information while alleviating the burden on physicians and clinic managers [<xref ref-type="bibr" rid="ref62">62</xref>-<xref ref-type="bibr" rid="ref65">65</xref>].</p><p>Our approach&#x2019;s interoperability means that any software capable of parsing JSON can work with these definitions, significantly lowering technical barriers to adoption. This universal accessibility promotes international collaboration and knowledge sharing, enabling public health officials to quickly adopt and implement definitions from other regions. This capability could substantially accelerate the establishment of effective surveillance systems during disease outbreaks, especially in settings with limited resources. While established health care interoperability standards, such as Fast Healthcare Interoperability Resources and Observational Medical Outcomes Partnership Common Data Model, provide comprehensive frameworks for structuring broad medical information, they often require substantial infrastructural investment, specialized technical expertise, and complex data mapping. These requirements can present significant barriers to entry, particularly for resource-constrained public health departments or during rapid, localized outbreak responses. In contrast, the OSD format is purposefully designed to be lightweight and accessible. By using a straightforward JSON-LD schema focused exclusively on epidemiological case definitions, it allows health workers and researchers to standardize their criteria immediately without needing to overhaul their existing IT systems. <xref ref-type="table" rid="table4">Table 4</xref> provides a structured comparison across key dimensions. The OSD format is not intended to replace these robust electronic health record standards; rather, it serves as an agile, domain-specific complement. It dramatically lowers the technical barrier to entry for epidemiologists while retaining the structural capacity to be mapped to Fast Healthcare Interoperability Resources (eg, Measure or PlanDefinition) for downstream institutional integration.</p><p>By grounding the OSD format in semantic web principles, our work directly connects to and builds upon large-scale existing efforts aimed at making medical diagnoses machine-readable. Initiatives such as the representation of rare diseases in BioSchemas and the MedicalCondition and MedicalCause classes in Schema.org have demonstrated the vast potential of linked data in health care. The OSD format complements these efforts by providing a specialized, deeply nested structural representation specifically tailored for public health surveillance and early warning systems. Future work will explore formally suggesting an epidemiological &#x201C;MedicalCase&#x201D; or &#x201C;Criterion&#x201D; class extension to the BioSchemas community, thereby contributing our public health modeling back to the broader semantic web ecosystem.</p><p>Looking ahead, the OSD format offers several promising avenues for future development. Users will be able to download definitions from various global sources and estimate the number of potential cases they could identify in their local data. This feature will speed up the development and adoption of locally optimized definitions and facilitate the implementation of ML models that leverage this standardized format. As more institutions contribute to and adopt this approach, we anticipate accelerated collaboration across borders and more responsive surveillance capabilities worldwide.</p><p>While our present efforts are concentrated on case definitions, there is also considerable potential to broaden the ready implementation of our methodology to syndromic surveillance. The principle of syndromic surveillance is to monitor patients presenting with symptoms, chief complaints, or other nonlaboratory or confirmatory diagnoses. Public health experts coordinating syndromic surveillance map individual clinical codes to more generic syndromic indicators. While these are not case definitions, as they are purely data- and code-based, similar problems exist with the consistency of syndromic indicators internationally.</p><p>Using OSD to present the code mappings that underpin syndromic indicators and to standardize these indicators internationally would support the global effort to coordinate syndromic surveillance between countries.</p><p>The versioning system in our format makes it easy to adapt to changing needs while also ensuring that older versions remain compatible. This addresses a crucial requirement for sustainable public health infrastructure. As the OSI expands its community of practitioners, we anticipate that the collective expertise will further refine the format and available definitions, thereby creating a robust ecosystem of standardized syndromic surveillance resources accessible to all.</p><p>In conclusion, the OSD format represents a significant step toward standardizing and digitizing case definitions for public health surveillance. It establishes a foundation for enhanced interoperability, collaboration, and automation in syndromic surveillance. The format&#x2019;s potential applications extend beyond traditional public health contexts into clinical research, healthcare delivery, and emerging disease response.</p><table-wrap id="t4" position="float"><label>Table 4.</label><caption><p>Comparison of Open Syndrome definition, CQL<sup><xref ref-type="table-fn" rid="table4fn1">a</xref></sup> or FHIR<sup><xref ref-type="table-fn" rid="table4fn2">b</xref></sup>, and OHDSI<sup><xref ref-type="table-fn" rid="table4fn3">c</xref></sup> Phenotype Library across key dimensions. CQL and the OHDSI Phenotype Library are mature, platform-coupled standards optimized for clinical care and retrospective research, respectively. The Open Syndrome definition distinguishes itself through minimal infrastructure requirements, a decoupled definition-execution architecture, and native support for cross-country interoperability and AI pipelines.</p></caption><table id="table4" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Dimension</td><td align="left" valign="bottom">CQL or FHIR</td><td align="left" valign="bottom">OHDSI phenotype library</td><td align="left" valign="bottom">Open Syndrome definition</td></tr></thead><tbody><tr><td align="left" valign="top">Required infrastructure</td><td align="left" valign="top">FHIR-compatible EHR<sup><xref ref-type="table-fn" rid="table4fn4">d</xref></sup> with CQL engine</td><td align="left" valign="top">OMOP<sup><xref ref-type="table-fn" rid="table4fn5">e</xref></sup> CDM<sup><xref ref-type="table-fn" rid="table4fn6">f</xref></sup> with ATLAS</td><td align="left" valign="top">JSON<sup><xref ref-type="table-fn" rid="table4fn7">g</xref></sup> parser; optional Python package (opensyndrome)</td></tr><tr><td align="left" valign="top">Format and execution</td><td align="left" valign="top">Domain-specific language;<break/>coupled to CQL engine</td><td align="left" valign="top">Platform-generated JSON; executed via HADES<sup><xref ref-type="table-fn" rid="table4fn8">h</xref></sup> (R)</td><td align="left" valign="top">JSON-LD<sup><xref ref-type="table-fn" rid="table4fn9">i</xref></sup>; decoupled from execution pipeline</td></tr><tr><td align="left" valign="top">Ontology support</td><td align="left" valign="top">SNOMED<sup><xref ref-type="table-fn" rid="table4fn10">j</xref></sup>, LOINC<sup><xref ref-type="table-fn" rid="table4fn11">k</xref></sup>, RxNorm<break/>via FHIR bindings</td><td align="left" valign="top">OMOP Vocabulary (Athena);<break/>locked to CDM</td><td align="left" valign="top">Agnostic; reuses HPO<sup><xref ref-type="table-fn" rid="table4fn12">l</xref></sup>,<break/>MONDO<sup><xref ref-type="table-fn" rid="table4fn13">m</xref></sup>, <italic>ICD</italic><sup><xref ref-type="table-fn" rid="table4fn14">n</xref></sup>, SNOMED via JSON-LD</td></tr><tr><td align="left" valign="top">Cross-country interoperability</td><td align="left" valign="top">Depends on FHIR adoption</td><td align="left" valign="top">Depends on CDM adoption</td><td align="left" valign="top">Primary design goal; explicit provenance and location metadata</td></tr><tr><td align="left" valign="top">Multilingual support</td><td align="left" valign="top">Partial</td><td align="left" valign="top">No</td><td align="left" valign="top">Yes</td></tr><tr><td align="left" valign="top">ML<sup><xref ref-type="table-fn" rid="table4fn15">o</xref></sup> or AI pipeline integration</td><td align="left" valign="top">Indirect via structured output</td><td align="left" valign="top">Via R with HADES package</td><td align="left" valign="top">Structured for direct ingestion<break/>into ML pipelines</td></tr></tbody></table><table-wrap-foot><fn id="table4fn1"><p><sup>a</sup>CQL: Clinical Quality Language.</p></fn><fn id="table4fn2"><p><sup>b</sup>FHIR: Fast Healthcare Interoperability Resources.</p></fn><fn id="table4fn3"><p><sup>c</sup>OHDSI: Observational Health Data Sciences and Informatics.</p></fn><fn id="table4fn4"><p><sup>d</sup>EHR: electronic health record.</p></fn><fn id="table4fn5"><p><sup>e</sup>OMOP: observational medical outcomes partnership.</p></fn><fn id="table4fn6"><p><sup>f</sup>CDM: Common Data Model</p></fn><fn id="table4fn7"><p><sup>g</sup>JSON: JavaScript object notation. </p></fn><fn id="table4fn8"><p><sup>h</sup>HADES: Health-Analytics Data to Evidence Suite.</p></fn><fn id="table4fn9"><p><sup>i</sup>LD: linked data. </p></fn><fn id="table4fn10"><p><sup>j</sup>SNOMED: Systematized Nomenclature of Medicine &#x2013; Clinical Terms.</p></fn><fn id="table4fn11"><p><sup>k</sup>LOINC: Logical Observation Identifiers Names and Codes.</p></fn><fn id="table4fn12"><p><sup>l</sup>HPO: human phenotype ontology.</p></fn><fn id="table4fn13"><p><sup>m</sup>MONDO: MONDO disease ontology.</p></fn><fn id="table4fn14"><p><sup>n</sup><italic>ICD</italic>: <italic>International Statistical Classification of Diseases and Related Health Problems</italic>. </p></fn><fn id="table4fn15"><p><sup>o</sup>ML: machine learning.</p></fn></table-wrap-foot></table-wrap></sec><sec id="s4-2"><title>Future Work</title><p>While our current dataset provides a robust baseline for validating the OSD format, we recognize the potential for scaling the repository. A major technical challenge remains the heterogeneity of source PDFs, which makes fully automated bulk extraction prone to errors. To address this, future development will introduce specialized extraction skills within our tooling. These modules will use a 2-step pipeline to first identify multiple conditions within a single document and then structure them, keeping a human in the loop for quality assurance. To enhance the quality assessment process, we intend to adopt automated validation frameworks, such as leveraging advanced LLMs as evaluators (LLM-as-a-judge), to methodically and quantitatively evaluate semantic fidelity in conjunction with our community-driven review process. Furthermore, as an open-source initiative, the platform is now fully equipped to receive community contributions, allowing the dataset to grow organically and incorporate definitions discovered through advanced search methodologies.</p><p>To enhance the semantic precision of this extraction process, we plan to implement deterministic mapping to standardized clinical codes. While LLMs excel at generating the initial structural hierarchy, assigning precise ontological codes requires a more rigid approach. Our tooling architecture will be updated to integrate with biomedical ontology repositories, such as the National Center for Biomedical Ontology BioPortal API. Through this integration, terms extracted by the LLM can be automatically queried against standard vocabularies (eg, SNOMED CT or <italic>ICD-10</italic> [<italic>International Statistical Classification of Diseases and Related Health Problems Tenth Revision</italic>]) to retrieve the most accurate concept identifiers, which are then validated by human experts and populated into the format&#x2019;s code fields.</p><p>Additionally, to maximize the utility of the OSI platform, we intend to develop an interactive comparative tool. As the repository grows, this feature will allow researchers and public health officials to visually and logically compare case definitions for the same disease across different jurisdictions. This will facilitate the identification of surveillance discrepancies and support the harmonization of global syndromic indicators.</p><p>Finally, a significant future development is improving the AI-driven natural language processing techniques that transform narrative definitions into OSD-format representations. The local LLMs of the current system, which are accessed through Ollama, could be supplemented with BioBERT [<xref ref-type="bibr" rid="ref66">66</xref>] or ClinicalBERT [<xref ref-type="bibr" rid="ref67">67</xref>]. Additionally, the Structured Prompt Interrogation and Recursive Extraction of Semantics (SPIRES) framework [<xref ref-type="bibr" rid="ref68">68</xref>], which combines zero-shot LLM prompting with deterministic ontology grounding, could complement our current extraction pipeline by improving identifier precision for HPO and MONDO terms. These models are specifically trained on multilingual and multicultural clinical datasets, which could improve parsing precision and flexibility. Furthermore, AI anomaly detection techniques (eg, isolation forest and transformer-based systems) could be used to automate the validation and correction of definitions, thereby improving the continuous quality of the dataset. AI-federated approaches would also enable collaborative algorithm development across jurisdictions while ensuring data confidentiality, which is essential for international disease monitoring and surveillance. Public health&#x2013;federated learning frameworks, such as Flower [<xref ref-type="bibr" rid="ref69">69</xref>] and PySyft [<xref ref-type="bibr" rid="ref70">70</xref>], could train and evaluate OSD format&#x2013;based ML models across regions without sharing sensitive health information. This would improve cross-border analysis, equity, fairness, and public health. OSD format may advance from a defined structure for collaborative model development by progressing in this way.</p></sec><sec id="s4-3"><title>Conclusions</title><p>The OSD provides a flexible, interoperable, and machine-readable representation of case definitions. This innovation enables public health professionals, researchers, and technically proficient individuals to exchange epidemiological information more efficiently and consistently. It also enables the broader application of AI and machine learning techniques to public health data. Along with the format, this work introduces the first dataset of case definitions available in both human- and machine-readable formats. The dataset demonstrates the adaptability of the OSD format across various diseases and geographic settings, paving the way for global comparative analyses of case definition methodologies. To promote collaboration, we have launched the OSI, a platform where users can share definitions and access tools for converting between human- and machine-readable formats. Public health is inherently collaborative, and the OSD format contributes to this shared effort by promoting better data and more effective tools for preparing for and preventing public health threats.</p></sec></sec></body><back><ack><p>AJE is affiliated with the National Institute for Health and Care Research Health Protection Research Unit in Emergency Preparedness and Response at the University of Birmingham, as well as the National Institute for Health and Care Research Health Protection Research Unit in Gastrointestinal Infections at the University of East Anglia. The views expressed in this article are those of the authors and do not necessarily reflect the views of the National Institute for Health and Care Research, UK Health Security Agency, or the Department of Health and Social Care.</p></ack><notes><sec><title>Funding</title><p>The authors declared no financial support was received for this work.</p></sec><sec><title>Data Availability</title><p>All resources associated with this work are publicly available. The schema, case definitions, official website [<xref ref-type="bibr" rid="ref59">59</xref>], and the Python package are hosted under the Open Syndrome Initiative GitHub organization [<xref ref-type="bibr" rid="ref40">40</xref>]. The dataset of definitions, including JSON, TXT, and PDF files, is available on Hugging Face [<xref ref-type="bibr" rid="ref60">60</xref>]. The Python package can also be accessed via Python Package Index [<xref ref-type="bibr" rid="ref58">58</xref>].</p></sec></notes><fn-group><fn fn-type="con"><p>APGF: Conceptualization, data curation, formal analysis, investigation, methodology, validation, visualization, writing - original draft.</p><p>AA: Conceptualization, visualization, writing - review &#x0026; editing.</p><p>IM: Discussion, review, and editing.</p><p>HH: Discussion and review.</p><p>AJE: Discussion, review, writing and editing.</p><p>JDK: Discussion, review, writing and editing.</p><p>MS: Discussion, review and editing.</p><p>AU: Discussion, review and editing.</p><p>GH: Conceptualization, investigation, supervision, writing - review and editing.</p></fn><fn fn-type="conflict"><p>None declared.</p></fn></fn-group><glossary><title>Abbreviations</title><def-list><def-item><term id="abb1">AI</term><def><p>artificial intelligence</p></def></def-item><def-item><term id="abb2">API</term><def><p>application programming interface</p></def></def-item><def-item><term id="abb3">FAIR</term><def><p>Findability, Accessibility, Interoperability, and Reusability</p></def></def-item><def-item><term id="abb4">HPO</term><def><p>Human Phenotype Ontology</p></def></def-item><def-item><term id="abb5"><italic>ICD</italic></term><def><p><italic>International Statistical Classification of Diseases and Related Health Problems</italic></p></def></def-item><def-item><term id="abb6"><italic>ICD-10</italic></term><def><p><italic>International Statistical Classification of Diseases and Related Health Problems Tenth Revision</italic></p></def></def-item><def-item><term id="abb7">JSON-LD</term><def><p>JSON for Linked Data</p></def></def-item><def-item><term id="abb8">LLM</term><def><p>large language model</p></def></def-item><def-item><term id="abb9">ML</term><def><p>machine learning</p></def></def-item><def-item><term id="abb10">MONDO</term><def><p>Mondo Disease Ontology</p></def></def-item><def-item><term id="abb11">OSD</term><def><p>Open Syndrome Definition</p></def></def-item><def-item><term id="abb12">OSI</term><def><p>Open Syndrome Initiative</p></def></def-item><def-item><term id="abb13">PPHSN</term><def><p>Pacific Public Health Surveillance Network</p></def></def-item><def-item><term id="abb14">SNOMED CT</term><def><p>Systematized Nomenclature of Medicine&#x2014;Clinical Terms</p></def></def-item><def-item><term id="abb15">SPIRES</term><def><p>Structured Prompt Interrogation and Recursive Extraction of Semantics</p></def></def-item><def-item><term id="abb16">WHO</term><def><p>World Health Organization</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Bassil</surname><given-names>K</given-names> </name></person-group><article-title>Case definition</article-title><source>Encyclopedia of Epidemiology</source><year>2008</year><volume>2</volume><publisher-name>SAGE Publications, Inc</publisher-name><fpage>133</fpage><lpage>134</lpage><pub-id pub-id-type="doi">10.4135/9781412953948</pub-id></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Devlin</surname><given-names>RK</given-names> </name><name name-style="western"><surname>Andrews</surname><given-names>MM</given-names> </name><name name-style="western"><surname>von Reyn</surname><given-names>CF</given-names> </name></person-group><article-title>Recent trends in infective endocarditis: influence of case definitions</article-title><source>Curr Opin Cardiol</source><year>2004</year><month>03</month><volume>19</volume><issue>2</issue><fpage>134</fpage><lpage>139</lpage><pub-id pub-id-type="doi">10.1097/00001573-200403000-00012</pub-id><pub-id pub-id-type="medline">15075740</pub-id></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Lazarus</surname><given-names>R</given-names> </name><name name-style="western"><surname>Kleinman</surname><given-names>KP</given-names> </name><name name-style="western"><surname>Dashevsky</surname><given-names>I</given-names> </name><name name-style="western"><surname>DeMaria</surname><given-names>A</given-names> </name><name name-style="western"><surname>Platt</surname><given-names>R</given-names> </name></person-group><article-title>Using automated medical records for rapid identification of illness syndromes (syndromic surveillance): the example of lower respiratory infection</article-title><source>BMC Public Health</source><year>2001</year><volume>1</volume><issue>1</issue><fpage>9</fpage><pub-id pub-id-type="doi">10.1186/1471-2458-1-9</pub-id><pub-id pub-id-type="medline">11722798</pub-id></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Chapman</surname><given-names>WW</given-names> </name><name name-style="western"><surname>Dowling</surname><given-names>JN</given-names> </name><name name-style="western"><surname>Baer</surname><given-names>A</given-names> </name><etal/></person-group><article-title>Developing syndrome definitions based on consensus and current use</article-title><source>J Am Med Inform Assoc</source><year>2010</year><volume>17</volume><issue>5</issue><fpage>595</fpage><lpage>601</lpage><pub-id pub-id-type="doi">10.1136/jamia.2010.003210</pub-id><pub-id pub-id-type="medline">20819870</pub-id></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Purdy</surname><given-names>S</given-names> </name><name name-style="western"><surname>Griffin</surname><given-names>T</given-names> </name><name name-style="western"><surname>Salisbury</surname><given-names>C</given-names> </name><name name-style="western"><surname>Sharp</surname><given-names>D</given-names> </name></person-group><article-title>Ambulatory care sensitive conditions: terminology and disease coding need to be more specific to aid policy makers and clinicians</article-title><source>Public Health (Fairfax)</source><year>2009</year><month>02</month><volume>123</volume><issue>2</issue><fpage>169</fpage><lpage>173</lpage><pub-id pub-id-type="doi">10.1016/j.puhe.2008.11.001</pub-id><pub-id pub-id-type="medline">19144363</pub-id></nlm-citation></ref><ref id="ref6"><label>6</label><nlm-citation citation-type="web"><article-title>EU case definitions</article-title><source>European Centre for Disease Prevention and Control</source><access-date>2026-05-22</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.ecdc.europa.eu/en/all-topics/eu-case-definitions">https://www.ecdc.europa.eu/en/all-topics/eu-case-definitions</ext-link></comment></nlm-citation></ref><ref id="ref7"><label>7</label><nlm-citation citation-type="web"><source>India case definitions - P form Case Definitions 2024</source><access-date>2026-06-10</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://web.archive.org/web/20240811013244/https://ihip.mohfw.gov.in/idsp/download?fileid=38">https://web.archive.org/web/20240811013244/https://ihip.mohfw.gov.in/idsp/download?fileid=38</ext-link></comment></nlm-citation></ref><ref id="ref8"><label>8</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sanchez Ruiz</surname><given-names>MA</given-names> </name><name name-style="western"><surname>Marques</surname><given-names>DF</given-names> </name><name name-style="western"><surname>Lomholt</surname><given-names>FK</given-names> </name><etal/></person-group><article-title>Surveillance of severe acute respiratory infections associated with SARS-CoV-2, influenza virus and RSV using ICD-10 codes: a case definition accuracy study across five European countries, 2021 to 2023</article-title><source>Euro Surveill</source><year>2025</year><month>07</month><volume>30</volume><issue>27</issue><fpage>2400748</fpage><pub-id pub-id-type="doi">10.2807/1560-7917.ES.2025.30.27.2400748</pub-id><pub-id pub-id-type="medline">40642769</pub-id></nlm-citation></ref><ref id="ref9"><label>9</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Li</surname><given-names>I</given-names> </name><name name-style="western"><surname>Pan</surname><given-names>J</given-names></name><name name-style="western"><surname>Goldwasser</surname><given-names>J</given-names> </name><etal/></person-group><article-title>Neural natural language processing for unstructured data in electronic health records: a review</article-title><source>Comput Sci Rev</source><year>2021</year><fpage>2107</fpage><pub-id pub-id-type="doi">10.1016/j.cosrev.2022.100511</pub-id></nlm-citation></ref><ref id="ref10"><label>10</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Wang</surname><given-names>N</given-names> </name><name name-style="western"><surname>Leem</surname><given-names>SG</given-names> </name></person-group><person-group person-group-type="editor"><name name-style="western"><surname>Ventura</surname><given-names>S</given-names> </name><name name-style="western"><surname>Luna</surname><given-names>JM</given-names> </name><name name-style="western"><surname>Mart&#x00ED;n-Casta&#x00F1;o</surname><given-names>ARM</given-names> </name></person-group><article-title>Modernizing data quality: evolving dimensions for unstructured text data in big data, AI and ethical contexts</article-title><source>Data Quality Matters - Best Practices for Integrity and Assurance</source><publisher-name>IntechOpen</publisher-name><pub-id pub-id-type="doi">10.5772/intechopen.1013232</pub-id></nlm-citation></ref><ref id="ref11"><label>11</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Gomes Ferreira</surname><given-names>AP</given-names> </name><name name-style="western"><surname>An&#x017E;el</surname><given-names>A</given-names> </name><name name-style="western"><surname>Ullrich</surname><given-names>A</given-names> </name><name name-style="western"><surname>Hattab</surname><given-names>G</given-names> </name></person-group><article-title>Advocating the potential of artificial intelligence for syndrome discovery in syndromic surveillance systems: a scoping review</article-title><source>iScience</source><year>2026</year><month>03</month><day>20</day><volume>29</volume><issue>3</issue><fpage>115103</fpage><pub-id pub-id-type="doi">10.1016/j.isci.2026.115103</pub-id><pub-id pub-id-type="medline">41867626</pub-id></nlm-citation></ref><ref id="ref12"><label>12</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Koh</surname><given-names>D</given-names> </name><name name-style="western"><surname>Cunningham</surname><given-names>AC</given-names> </name></person-group><article-title>Counting coronavirus disease 2019 (COVID-19) cases: case definitions, screened populations and testing techniques matter</article-title><source>Ann Acad Med Singap</source><year>2020</year><month>03</month><volume>49</volume><issue>3</issue><fpage>161</fpage><lpage>165</lpage><pub-id pub-id-type="doi">10.47102/annals-acadmedsg.202038</pub-id><pub-id pub-id-type="medline">32301478</pub-id></nlm-citation></ref><ref id="ref13"><label>13</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Gu&#x00E9;rin</surname><given-names>PJ</given-names> </name><name name-style="western"><surname>McLean</surname><given-names>ARD</given-names> </name><name name-style="western"><surname>Rashan</surname><given-names>S</given-names> </name><etal/></person-group><article-title>Definitions matter: heterogeneity of COVID-19 disease severity criteria and incomplete reporting compromise meta-analysis</article-title><source>PLOS Glob Public Health</source><year>2022</year><volume>2</volume><issue>7</issue><fpage>e0000561</fpage><pub-id pub-id-type="doi">10.1371/journal.pgph.0000561</pub-id><pub-id pub-id-type="medline">36962738</pub-id></nlm-citation></ref><ref id="ref14"><label>14</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Tsang</surname><given-names>TK</given-names> </name><name name-style="western"><surname>Wu</surname><given-names>P</given-names> </name><name name-style="western"><surname>Lin</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Lau</surname><given-names>EHY</given-names> </name><name name-style="western"><surname>Leung</surname><given-names>GM</given-names> </name><name name-style="western"><surname>Cowling</surname><given-names>BJ</given-names> </name></person-group><article-title>Effect of changing case definitions for COVID-19 on the epidemic curve and transmission parameters in mainland China: a modelling study</article-title><source>Lancet Public Health</source><year>2020</year><month>05</month><volume>5</volume><issue>5</issue><fpage>e289</fpage><lpage>e296</lpage><pub-id pub-id-type="doi">10.1016/S2468-2667(20)30089-X</pub-id><pub-id pub-id-type="medline">32330458</pub-id></nlm-citation></ref><ref id="ref15"><label>15</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Badker</surname><given-names>R</given-names> </name><name name-style="western"><surname>Miller</surname><given-names>K</given-names> </name><name name-style="western"><surname>Pardee</surname><given-names>C</given-names> </name><etal/></person-group><article-title>Challenges in reported COVID-19 data: best practices and recommendations for future epidemics</article-title><source>BMJ Glob Health</source><year>2021</year><month>05</month><volume>6</volume><issue>5</issue><fpage>e005542</fpage><pub-id pub-id-type="doi">10.1136/bmjgh-2021-005542</pub-id><pub-id pub-id-type="medline">33958393</pub-id></nlm-citation></ref><ref id="ref16"><label>16</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Raveendran</surname><given-names>AV</given-names> </name><name name-style="western"><surname>Joshi</surname><given-names>SR</given-names> </name></person-group><article-title>Evolution of a pandemic and changing concepts in the case definition</article-title><source>J Assoc Physicians India</source><year>2024</year><month>08</month><volume>72</volume><issue>8</issue><fpage>75</fpage><lpage>77</lpage><pub-id pub-id-type="doi">10.59556/japi.72.0606</pub-id><pub-id pub-id-type="medline">39163071</pub-id></nlm-citation></ref><ref id="ref17"><label>17</label><nlm-citation citation-type="web"><person-group person-group-type="author"><name name-style="western"><surname>Suthar</surname><given-names>AB</given-names> </name><etal/></person-group><article-title>Coronavirus Disease Case Definitions, Diagnostic Testing Criteria, and Surveillance in 25 Countries with Highest Reported Case Counts</article-title><year>2022</year><month>01</month><comment><ext-link ext-link-type="uri" xlink:href="https://doi.org/10.3201/eid2801.211082">https://doi.org/10.3201/eid2801.211082</ext-link></comment><pub-id pub-id-type="doi">10.3201/eid2801.211082</pub-id></nlm-citation></ref><ref id="ref18"><label>18</label><nlm-citation citation-type="web"><article-title>Overview of syndromic surveillance what is syndromic surveillance?</article-title><source>Centers for Disease Control and Prevention</source><access-date>2026-06-10</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.cdc.gov/mmwr/preview/mmwrhtml/su5301a3.htm">https://www.cdc.gov/mmwr/preview/mmwrhtml/su5301a3.htm</ext-link></comment></nlm-citation></ref><ref id="ref19"><label>19</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Krause</surname><given-names>G</given-names> </name><name name-style="western"><surname>Brodhun</surname><given-names>B</given-names> </name><name name-style="western"><surname>Altmann</surname><given-names>D</given-names> </name><name name-style="western"><surname>Claus</surname><given-names>H</given-names> </name><name name-style="western"><surname>Benzler</surname><given-names>J</given-names> </name></person-group><article-title>Reliability of case definitions for public health surveillance assessed by Round-Robin test methodology</article-title><source>BMC Public Health</source><year>2006</year><month>05</month><day>10</day><volume>6</volume><fpage>129</fpage><pub-id pub-id-type="doi">10.1186/1471-2458-6-129</pub-id><pub-id pub-id-type="medline">16686946</pub-id></nlm-citation></ref><ref id="ref20"><label>20</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ghodsi</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Rahimi Movaghar</surname><given-names>V</given-names> </name><name name-style="western"><surname>Zafarghandi</surname><given-names>M</given-names> </name><etal/></person-group><article-title>The minimum dataset and inclusion criteria for the national trauma registry of Iran: a qualitative study</article-title><source>Arch Trauma Res</source><year>2017</year><volume>6</volume><issue>2</issue><fpage>1</fpage><lpage>7</lpage><pub-id pub-id-type="doi">10.5812/atr.39725</pub-id></nlm-citation></ref><ref id="ref21"><label>21</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Botsis</surname><given-names>T</given-names> </name><name name-style="western"><surname>Ball</surname><given-names>R</given-names> </name></person-group><article-title>Automating case definitions using literature-based reasoning</article-title><source>Appl Clin Inform</source><year>2013</year><volume>4</volume><issue>4</issue><fpage>515</fpage><lpage>527</lpage><pub-id pub-id-type="doi">10.4338/ACI-2013-04-RA-0028</pub-id><pub-id pub-id-type="medline">24454579</pub-id></nlm-citation></ref><ref id="ref22"><label>22</label><nlm-citation citation-type="web"><article-title>Json-ld 1.0: a json-based serialization for linked data</article-title><source>Traves&#x00ED;a</source><year>2014</year><access-date>2026-06-10</access-date><comment><ext-link ext-link-type="uri" xlink:href="http://hdl.handle.net/10421/7478">http://hdl.handle.net/10421/7478</ext-link></comment></nlm-citation></ref><ref id="ref23"><label>23</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Lanthaler</surname><given-names>M</given-names> </name><name name-style="western"><surname>G&#x00FC;tl</surname><given-names>C</given-names> </name></person-group><article-title>On using JSON-LD to create evolvable restful services</article-title><year>2012</year><month>04</month><day>17</day><conf-name>WS-REST &#x2019;12: Proceedings of the Third International Workshop on RESTful</conf-name><conf-date>Apr 17, 2012</conf-date><conf-loc>Lyon France</conf-loc><publisher-name>ACM</publisher-name><fpage>25</fpage><lpage>32</lpage><comment><ext-link ext-link-type="uri" xlink:href="https://dl.acm.org/doi/proceedings/10.1145/2307819">https://dl.acm.org/doi/proceedings/10.1145/2307819</ext-link></comment><pub-id pub-id-type="doi">10.1145/2307819.2307827</pub-id></nlm-citation></ref><ref id="ref24"><label>24</label><nlm-citation citation-type="web"><article-title>Case definitions</article-title><source>Robert Koch Institut</source><access-date>2026-05-22</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.rki.de/DE/Themen/Infektionskrankheiten/Meldewesen/Falldefinitionen/falldefinitionen-node.%20html">https://www.rki.de/DE/Themen/Infektionskrankheiten/Meldewesen/Falldefinitionen/falldefinitionen-node.html</ext-link></comment></nlm-citation></ref><ref id="ref25"><label>25</label><nlm-citation citation-type="web"><article-title>Surveillance case definitions for current and historical conditions</article-title><source>Centers for Disease Control and Prevention</source><year>2025</year><access-date>2026-05-22</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://ndc.services.cdc.gov/">https://ndc.services.cdc.gov/</ext-link></comment></nlm-citation></ref><ref id="ref26"><label>26</label><nlm-citation citation-type="web"><article-title>Case definitions: nationally notifiable diseases</article-title><source>Government of Canada</source><year>2000</year><access-date>2026-06-10</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://diseases.canada.ca/notifiable/diseases-list">https://diseases.canada.ca/notifiable/diseases-list</ext-link></comment></nlm-citation></ref><ref id="ref27"><label>27</label><nlm-citation citation-type="web"><article-title>Collaboration is advancing science and practice of public health</article-title><source>Knowledge Repository</source><access-date>2026-05-22</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://knowledgerepository.syndromicsurveillance.org">https://knowledgerepository.syndromicsurveillance.org</ext-link></comment></nlm-citation></ref><ref id="ref28"><label>28</label><nlm-citation citation-type="web"><article-title>PubMed</article-title><source>US National Library of Medicine</source><access-date>2026-06-12</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://pubmed.ncbi.nlm.nih.gov/">https://pubmed.ncbi.nlm.nih.gov/</ext-link></comment></nlm-citation></ref><ref id="ref29"><label>29</label><nlm-citation citation-type="report"><person-group person-group-type="author"><collab>OurResearch</collab></person-group><article-title>Openalex: a fully open catalog of the global research system</article-title><year>2025</year><access-date>2026-05-13</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://openalex.org/">https://openalex.org/</ext-link></comment></nlm-citation></ref><ref id="ref30"><label>30</label><nlm-citation citation-type="web"><article-title>Kaggle: your machine learning and data science community</article-title><source>Kaggle Inc</source><year>2025</year><access-date>2026-06-10</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.kaggle.com/">https://www.kaggle.com/</ext-link></comment></nlm-citation></ref><ref id="ref31"><label>31</label><nlm-citation citation-type="web"><article-title>Hugging face &#x2013; the ai community building the future</article-title><source>Hugging Face</source><year>2025</year><access-date>2026-06-10</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://huggingface.co/">https://huggingface.co/</ext-link></comment></nlm-citation></ref><ref id="ref32"><label>32</label><nlm-citation citation-type="web"><article-title>Harvard Dataverse</article-title><source>Harvard University</source><year>2025</year><access-date>2026-06-10</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://data.harvard.edu/dataverse">https://data.harvard.edu/dataverse</ext-link></comment></nlm-citation></ref><ref id="ref33"><label>33</label><nlm-citation citation-type="web"><article-title>The fastest JSON schema Validator. Supports JSON Schema draft-04/06/07/2019-09/2020-12 and JSON Type Definition (RFC8927)</article-title><source>GitHub</source><access-date>2026-06-10</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://github.com/ajv-validator/ajv">https://github.com/ajv-validator/ajv</ext-link></comment></nlm-citation></ref><ref id="ref34"><label>34</label><nlm-citation citation-type="web"><article-title>Outbreak toolkit</article-title><source>World Health Organization</source><access-date>2026-05-22</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.who.int/emergencies/outbreak-toolkit">https://www.who.int/emergencies/outbreak-toolkit</ext-link></comment></nlm-citation></ref><ref id="ref35"><label>35</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Newman-Griffis</surname><given-names>D</given-names> </name><name name-style="western"><surname>Divita</surname><given-names>G</given-names> </name><name name-style="western"><surname>Desmet</surname><given-names>B</given-names> </name><name name-style="western"><surname>Zirikly</surname><given-names>A</given-names> </name><name name-style="western"><surname>Ros&#x00E9;</surname><given-names>CP</given-names> </name><name name-style="western"><surname>Fosler-Lussier</surname><given-names>E</given-names> </name></person-group><article-title>Ambiguity in medical concept normalization: an analysis of types and coverage in electronic health record datasets</article-title><source>J Am Med Inform Assoc</source><year>2021</year><month>03</month><day>1</day><volume>28</volume><issue>3</issue><fpage>516</fpage><lpage>532</lpage><pub-id pub-id-type="doi">10.1093/jamia/ocaa269</pub-id><pub-id pub-id-type="medline">33319905</pub-id></nlm-citation></ref><ref id="ref36"><label>36</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Guha</surname><given-names>RV</given-names> </name><name name-style="western"><surname>Brickley</surname><given-names>D</given-names> </name><name name-style="western"><surname>Macbeth</surname><given-names>S</given-names> </name></person-group><article-title>Schema.org: evolution of structured data on the web</article-title><source>Commun ACM</source><year>2016</year><volume>59</volume><fpage>44</fpage><lpage>51</lpage><pub-id pub-id-type="doi">10.1145/2844544</pub-id></nlm-citation></ref><ref id="ref37"><label>37</label><nlm-citation citation-type="web"><person-group person-group-type="author"><name name-style="western"><surname>Gray</surname><given-names>AJ</given-names> </name><name name-style="western"><surname>Goble</surname><given-names>CA</given-names> </name><name name-style="western"><surname>Jimenez</surname><given-names>RC</given-names> </name><etal/></person-group><article-title>Bioschemas: from potato salad to protein annotation</article-title><year>2017</year><access-date>2026-06-10</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://doi.org/10.7490/f1000research.1114493.1">https://doi.org/10.7490/f1000research.1114493.1</ext-link></comment></nlm-citation></ref><ref id="ref38"><label>38</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>K&#x00F6;hler</surname><given-names>S</given-names> </name><name name-style="western"><surname>Gargano</surname><given-names>M</given-names> </name><name name-style="western"><surname>Matentzoglu</surname><given-names>N</given-names> </name><etal/></person-group><article-title>The Human Phenotype Ontology in 2021</article-title><source>Nucleic Acids Res</source><year>2021</year><month>01</month><day>8</day><volume>49</volume><issue>D1</issue><fpage>D1207</fpage><lpage>D1217</lpage><pub-id pub-id-type="doi">10.1093/nar/gkaa1043</pub-id><pub-id pub-id-type="medline">33264411</pub-id></nlm-citation></ref><ref id="ref39"><label>39</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Anjum</surname><given-names>F</given-names> </name><name name-style="western"><surname>Maqbool</surname><given-names>F</given-names> </name><name name-style="western"><surname>Razzaq</surname><given-names>MS</given-names> </name><etal/></person-group><article-title>Semantic web ontology for structured knowledge representation and clinical decision support in eye diseases</article-title><source>Sci Rep</source><year>2025</year><volume>15</volume><issue>1</issue><fpage>29986</fpage><pub-id pub-id-type="doi">10.1038/s41598-025-15885-x</pub-id></nlm-citation></ref><ref id="ref40"><label>40</label><nlm-citation citation-type="web"><article-title>Open Syndrome</article-title><source>GitHub</source><access-date>2026-05-23</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://github.com/OpenSyndrome">https://github.com/OpenSyndrome</ext-link></comment></nlm-citation></ref><ref id="ref41"><label>41</label><nlm-citation citation-type="web"><article-title>International statistical classification of diseases and related health problems (ICD)</article-title><source>World Health Organization</source><access-date>2026-05-22</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.who.int/standards/classifications/classification-of-diseases">https://www.who.int/standards/classifications/classification-of-diseases</ext-link></comment></nlm-citation></ref><ref id="ref42"><label>42</label><nlm-citation citation-type="web"><article-title>Overview of SNOMED CT</article-title><source>National Library of Medicine</source><access-date>2026-05-22</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.nlm.nih.gov/healthit/snomedct/snomed_overview.html">https://www.nlm.nih.gov/healthit/snomedct/snomed_overview.html</ext-link></comment></nlm-citation></ref><ref id="ref43"><label>43</label><nlm-citation citation-type="web"><source>Ollama</source><year>2024</year><access-date>2025-04-21</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://ollama.com">https://ollama.com</ext-link></comment></nlm-citation></ref><ref id="ref44"><label>44</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Touvron</surname><given-names>H</given-names> </name><name name-style="western"><surname>Thibaut</surname><given-names>L</given-names> </name><name name-style="western"><surname>Gautier</surname><given-names>I</given-names> </name><etal/></person-group><article-title>LLaMA: open and efficient foundation language models</article-title><source>arXiv</source><comment>Preprint posted online on  Feb 27, 2023</comment><pub-id pub-id-type="doi">10.48550/arXiv.2302.13971</pub-id></nlm-citation></ref><ref id="ref45"><label>45</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Jiang</surname><given-names>AQ</given-names> </name><etal/></person-group><article-title>Mistral 7B</article-title><comment>Preprint posted online on 2023</comment><comment><ext-link ext-link-type="uri" xlink:href="https://arxiv.org/abs/2310.06825">https://arxiv.org/abs/2310.06825</ext-link></comment></nlm-citation></ref><ref id="ref46"><label>46</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Guo</surname><given-names>D</given-names> </name><name name-style="western"><surname>Yang</surname><given-names>D</given-names> </name><name name-style="western"><surname>Zhang</surname><given-names>H</given-names> </name><etal/></person-group><article-title>DeepSeek-R1 incentivizes reasoning in LLMs through reinforcement learning</article-title><source>Nature</source><year>2025</year><month>09</month><volume>645</volume><issue>8081</issue><fpage>633</fpage><lpage>638</lpage><pub-id pub-id-type="doi">10.1038/s41586-025-09422-z</pub-id><pub-id pub-id-type="medline">40962978</pub-id></nlm-citation></ref><ref id="ref47"><label>47</label><nlm-citation citation-type="web"><article-title>Medllama2</article-title><source>Ollama</source><access-date>2026-06-10</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://ollama.com/library/medllama2">https://ollama.com/library/medllama2</ext-link></comment></nlm-citation></ref><ref id="ref48"><label>48</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Hui</surname><given-names>B</given-names> </name><etal/></person-group><article-title>Qwen2.5-coder technical report</article-title><source>arXiv</source><comment>Preprint posted online on  Sep 18, 2024</comment><pub-id pub-id-type="doi">10.48550/arXiv.2409.12186</pub-id></nlm-citation></ref><ref id="ref49"><label>49</label><nlm-citation citation-type="web"><person-group person-group-type="author"><name name-style="western"><surname>Ferreira</surname><given-names>APG</given-names> </name><name name-style="western"><surname>An&#x017E;el</surname><given-names>A</given-names> </name><name name-style="western"><surname>Marcilio de</surname><given-names>IO</given-names> </name></person-group><article-title>The open syndrome definition</article-title><source>GitHub</source><access-date>2026-05-23</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://github.com/OpenSyndrome/schema/">https://github.com/OpenSyndrome/schema/</ext-link></comment></nlm-citation></ref><ref id="ref50"><label>50</label><nlm-citation citation-type="web"><article-title>The world&#x2019;s fastest framework for building websites</article-title><source>Hugo</source><access-date>2026-05-22</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://gohugo.io">https://gohugo.io</ext-link></comment></nlm-citation></ref><ref id="ref51"><label>51</label><nlm-citation citation-type="web"><article-title>Markdown</article-title><source>Wikipedia</source><year>2025</year><access-date>2026-06-10</access-date></nlm-citation></ref><ref id="ref52"><label>52</label><nlm-citation citation-type="web"><article-title>The javascript library for bespoke data visualization</article-title><source>D3</source><access-date>2026-05-22</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://d3js.org">https://d3js.org</ext-link></comment></nlm-citation></ref><ref id="ref53"><label>53</label><nlm-citation citation-type="web"><article-title>Toolkit for investigation and response to food and waterborne disease outbreaks with an EU dimension</article-title><source>European Centre for Disease Prevention and Control</source><year>2012</year><access-date>2026-05-22</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.ecdc.europa.eu/en/publications-data/toolkit-investigation-and-response-food-and-waterborne-disease-outbreaks-eu">https://www.ecdc.europa.eu/en/publications-data/toolkit-investigation-and-response-food-and-waterborne-disease-outbreaks-eu</ext-link></comment></nlm-citation></ref><ref id="ref54"><label>54</label><nlm-citation citation-type="web"><article-title>Draft 2020-12</article-title><source>JSON Schema</source><access-date>2026-05-22</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://json-schema.org/draft/2020-12">https://json-schema.org/draft/2020-12</ext-link></comment></nlm-citation></ref><ref id="ref55"><label>55</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wilkinson</surname><given-names>MD</given-names> </name><name name-style="western"><surname>Dumontier</surname><given-names>M</given-names> </name><name name-style="western"><surname>Aalbersberg</surname><given-names>IJJ</given-names> </name><etal/></person-group><article-title>The FAIR Guiding Principles for scientific data management and stewardship</article-title><source>Sci Data</source><year>2016</year><month>03</month><day>15</day><volume>3</volume><issue>1</issue><fpage>160018</fpage><pub-id pub-id-type="doi">10.1038/sdata.2016.18</pub-id><pub-id pub-id-type="medline">26978244</pub-id></nlm-citation></ref><ref id="ref56"><label>56</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Hattab</surname><given-names>G</given-names> </name><name name-style="western"><surname>Irrgang</surname><given-names>C</given-names> </name><name name-style="western"><surname>K&#x00F6;rber</surname><given-names>N</given-names> </name><name name-style="western"><surname>K&#x00FC;hnert</surname><given-names>D</given-names> </name><name name-style="western"><surname>Ladewig</surname><given-names>K</given-names> </name></person-group><article-title>The way forward to embrace artificial intelligence in public health</article-title><source>Am J Public Health</source><year>2025</year><month>02</month><volume>115</volume><issue>2</issue><fpage>123</fpage><lpage>128</lpage><pub-id pub-id-type="doi">10.2105/AJPH.2024.307888</pub-id><pub-id pub-id-type="medline">39571129</pub-id></nlm-citation></ref><ref id="ref57"><label>57</label><nlm-citation citation-type="web"><article-title>Live. data browser</article-title><source>Open Syndrome Initiative</source><access-date>2026-05-23</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://opensyndrome.org/live/">https://opensyndrome.org/live/</ext-link></comment></nlm-citation></ref><ref id="ref58"><label>58</label><nlm-citation citation-type="web"><article-title>Open Syndrome 0.3.0</article-title><source>PyPI</source><access-date>2026-05-23</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://pypi.org/project/opensyndrome/">https://pypi.org/project/opensyndrome/</ext-link></comment></nlm-citation></ref><ref id="ref59"><label>59</label><nlm-citation citation-type="web"><source>Open Syndrome Initiative</source><access-date>2025-05-23</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://opensyndrome.org">https://opensyndrome.org</ext-link></comment></nlm-citation></ref><ref id="ref60"><label>60</label><nlm-citation citation-type="web"><article-title>Open Syndrome dataset</article-title><source>Hugging Face</source><access-date>2026-05-23</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://huggingface.co/datasets/opensyndrome/case-definitions">https://huggingface.co/datasets/opensyndrome/case-definitions</ext-link></comment></nlm-citation></ref><ref id="ref61"><label>61</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wang</surname><given-names>L</given-names> </name><name name-style="western"><surname>Zipursky</surname><given-names>AR</given-names> </name><name name-style="western"><surname>Geva</surname><given-names>A</given-names> </name><name name-style="western"><surname>McMurry</surname><given-names>AJ</given-names> </name><name name-style="western"><surname>Mandl</surname><given-names>KD</given-names> </name><name name-style="western"><surname>Miller</surname><given-names>TA</given-names> </name></person-group><article-title>A computable case definition for patients with SARS-CoV2 testing that occurred outside the hospital</article-title><source>JAMIA Open</source><year>2023</year><month>10</month><volume>6</volume><issue>3</issue><fpage>ooad047</fpage><pub-id pub-id-type="doi">10.1093/jamiaopen/ooad047</pub-id><pub-id pub-id-type="medline">37425487</pub-id></nlm-citation></ref><ref id="ref62"><label>62</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Rado&#x00EF;</surname><given-names>L</given-names> </name><name name-style="western"><surname>Luce</surname><given-names>D</given-names> </name></person-group><article-title>A review of risk factors for oral cavity cancer: the importance of a standardized case definition</article-title><source>Community Dent Oral Epidemiol</source><year>2013</year><month>04</month><volume>41</volume><issue>2</issue><fpage>97</fpage><lpage>109</lpage><pub-id pub-id-type="doi">10.1111/j.1600-0528.2012.00710.x</pub-id><pub-id pub-id-type="medline">22882534</pub-id></nlm-citation></ref><ref id="ref63"><label>63</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Chan</surname><given-names>DJ</given-names> </name><name name-style="western"><surname>Donovan</surname><given-names>B</given-names> </name></person-group><article-title>What&#x2019;s in a word? case definitions in sexual health medicine</article-title><source>Int J STD AIDS</source><year>2005</year><month>02</month><volume>16</volume><issue>2</issue><fpage>91</fpage><lpage>94</lpage><pub-id pub-id-type="doi">10.1258/0956462053057657</pub-id><pub-id pub-id-type="medline">15807934</pub-id></nlm-citation></ref><ref id="ref64"><label>64</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Williamson</surname><given-names>T</given-names> </name><name name-style="western"><surname>Miyagishima</surname><given-names>RC</given-names> </name><name name-style="western"><surname>Derochie</surname><given-names>JD</given-names> </name><name name-style="western"><surname>Drummond</surname><given-names>N</given-names> </name></person-group><article-title>Manual review of electronic medical records as a reference standard for case definition development: a validation study</article-title><source>CMAJ Open</source><year>2017</year><month>12</month><day>11</day><volume>5</volume><issue>4</issue><fpage>E830</fpage><lpage>E833</lpage><pub-id pub-id-type="doi">10.9778/cmajo.20170077</pub-id><pub-id pub-id-type="medline">29242256</pub-id></nlm-citation></ref><ref id="ref65"><label>65</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Collin</surname><given-names>L</given-names> </name><name name-style="western"><surname>Reisner</surname><given-names>SL</given-names> </name><name name-style="western"><surname>Tangpricha</surname><given-names>V</given-names> </name><name name-style="western"><surname>Goodman</surname><given-names>M</given-names> </name></person-group><article-title>Prevalence of transgender depends on the &#x201C;Case&#x201D; definition: a systematic review</article-title><source>J Sex Med</source><year>2016</year><month>04</month><volume>13</volume><issue>4</issue><fpage>613</fpage><lpage>626</lpage><pub-id pub-id-type="doi">10.1016/j.jsxm.2016.02.001</pub-id><pub-id pub-id-type="medline">27045261</pub-id></nlm-citation></ref><ref id="ref66"><label>66</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Lee</surname><given-names>J</given-names> </name><name name-style="western"><surname>Yoon</surname><given-names>W</given-names> </name><name name-style="western"><surname>Kim</surname><given-names>S</given-names> </name><etal/></person-group><article-title>BioBERT: a pre-trained biomedical language representation model for biomedical text mining</article-title><source>Bioinformatics</source><year>2020</year><month>02</month><day>15</day><volume>36</volume><issue>4</issue><fpage>1234</fpage><lpage>1240</lpage><pub-id pub-id-type="doi">10.1093/bioinformatics/btz682</pub-id><pub-id pub-id-type="medline">31501885</pub-id></nlm-citation></ref><ref id="ref67"><label>67</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Huang</surname><given-names>K</given-names> </name><name name-style="western"><surname>Altosaar</surname><given-names>J</given-names> </name><name name-style="western"><surname>Ranganath</surname><given-names>R</given-names> </name></person-group><article-title>Clinicalbert: modeling clinical notes and predicting hospital readmission</article-title><source>arXiv</source><comment>Preprint posted online on  Apr 10, 2019</comment><pub-id pub-id-type="doi">10.48550/arXiv.1904.05342</pub-id></nlm-citation></ref><ref id="ref68"><label>68</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Caufield</surname><given-names>JH</given-names> </name><name name-style="western"><surname>Hegde</surname><given-names>H</given-names> </name><name name-style="western"><surname>Emonet</surname><given-names>V</given-names> </name><etal/></person-group><article-title>Structured Prompt Interrogation and Recursive Extraction of Semantics (SPIRES): a method for populating knowledge bases using zero-shot learning</article-title><source>Bioinformatics</source><year>2024</year><month>03</month><day>4</day><volume>40</volume><issue>3</issue><pub-id pub-id-type="doi">10.1093/bioinformatics/btae104</pub-id></nlm-citation></ref><ref id="ref69"><label>69</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Beutel</surname><given-names>DJ</given-names> </name><etal/></person-group><article-title>Flower: a friendly federated learning research framework</article-title><source>arXiv</source><comment>Preprint posted online on  Jul 28, 2020</comment><pub-id pub-id-type="doi">10.48550/arXiv.2007.14390</pub-id></nlm-citation></ref><ref id="ref70"><label>70</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Ziller</surname><given-names>A</given-names> </name><etal/></person-group><article-title>Pysyft: a library for easy federated learning</article-title><source>Federated Learning Systems: Towards next-Generation AI, 111&#x2013;139</source><year>2021</year><publisher-name>Springer</publisher-name><pub-id pub-id-type="doi">10.1007/978-3-030-70604-3_5</pub-id></nlm-citation></ref></ref-list></back></article>