<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMIR</journal-id>
      <journal-id journal-id-type="nlm-ta">J Med Internet Res</journal-id>
      <journal-title>Journal of Medical Internet Research</journal-title>
      <issn pub-type="epub">1438-8871</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v22i6e17457</article-id>
      <article-id pub-id-type="pmid">32501271</article-id>
      <article-id pub-id-type="doi">10.2196/17457</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Medical Mobile App Classification Using the National Institute for Health and Care Excellence Evidence Standards Framework for Digital Health Technologies: Interrater Reliability Study</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Eysenbach</surname>
            <given-names>Gunther</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Valentine</surname>
            <given-names>Lee</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Holl</surname>
            <given-names>Felix</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author">
          <name name-style="western">
            <surname>Nwe</surname>
            <given-names>Khine</given-names>
          </name>
          <degrees>BSc, MSc</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-3421-0164</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author">
          <name name-style="western">
            <surname>Larsen</surname>
            <given-names>Mark Erik</given-names>
          </name>
          <degrees>DPhil</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-0272-2053</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Nelissen</surname>
            <given-names>Natalie</given-names>
          </name>
          <degrees>MSc, PhD</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-2455-8143</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Wong</surname>
            <given-names>David Chi-Wai</given-names>
          </name>
          <degrees>DPhil, MEng</degrees>
          <xref rid="aff4" ref-type="aff">4</xref>
          <address>
            <institution>Centre for Health Informatics</institution>
            <institution>University of Manchester</institution>
            <addr-line>Vaughan House</addr-line>
            <addr-line>Portsmouth Street</addr-line>
            <addr-line>Manchester, M13 9GB</addr-line>
            <country>United Kingdom</country>
            <phone>44 1613069280</phone>
            <email>david.wong@manchester.ac.uk</email>
          </address>
          <xref rid="aff5" ref-type="aff">5</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-8117-9193</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Leeds Institute of Health Sciences</institution>
        <institution>University of Leeds</institution>
        <addr-line>Leeds</addr-line>
        <country>United Kingdom</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Black Dog Institute</institution>
        <institution>University of New South Wales</institution>
        <addr-line>Sydney</addr-line>
        <country>Australia</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>Leeds Institute of Data Analytics</institution>
        <institution>University of Leeds</institution>
        <addr-line>Leeds</addr-line>
        <country>United Kingdom</country>
      </aff>
      <aff id="aff4">
        <label>4</label>
        <institution>Centre for Health Informatics</institution>
        <institution>University of Manchester</institution>
        <addr-line>Manchester</addr-line>
        <country>United Kingdom</country>
      </aff>
      <aff id="aff5">
        <label>5</label>
        <institution>Department of Computer Science</institution>
        <institution>University of Manchester</institution>
        <addr-line>Manchester</addr-line>
        <country>United Kingdom</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: David Chi-Wai Wong <email>david.wong@manchester.ac.uk</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <month>6</month>
        <year>2020</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>5</day>
        <month>6</month>
        <year>2020</year>
      </pub-date>
      <volume>22</volume>
      <issue>6</issue>
      <elocation-id>e17457</elocation-id>
      <history>
        <date date-type="received">
          <day>16</day>
          <month>12</month>
          <year>2019</year>
        </date>
        <date date-type="rev-request">
          <day>13</day>
          <month>2</month>
          <year>2020</year>
        </date>
        <date date-type="rev-recd">
          <day>25</day>
          <month>2</month>
          <year>2020</year>
        </date>
        <date date-type="accepted">
          <day>29</day>
          <month>2</month>
          <year>2020</year>
        </date>
      </history>
      <copyright-statement>©Khine Nwe, Mark Erik Larsen, Natalie Nelissen, David Chi-Wai Wong. Originally published in the Journal of Medical Internet Research (http://www.jmir.org), 05.06.2020.</copyright-statement>
      <copyright-year>2020</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in the Journal of Medical Internet Research, is properly cited. The complete bibliographic information, a link to the original publication on http://www.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="http://www.jmir.org/2020/6/e17457/" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Clinical governance of medical mobile apps is challenging, and there is currently no standard method for assessing the quality of such apps. In 2018, the National Institute for Health and Care Excellence (NICE) developed a framework for assessing the required level of evidence for digital health technologies (DHTs), as determined by their clinical function. The framework can potentially be used to assess mobile apps, which are a subset of DHTs. To be used reliably in this context, the framework must allow unambiguous classification of an app’s clinical function.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>The objective of this study was to determine whether mobile health apps could be reliably classified using the NICE evidence standards framework for DHTs.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>We manually extracted app titles, screenshots, and content descriptions for all apps listed on the National Health Service (NHS) Apps Library website on July 12, 2019; none of the apps were downloaded. Using this information, 2 mobile health (mHealth) researchers independently classified each app to one of the 4 functional tiers (ie, 1, 2, 3a, and 3b) described in the NICE digital technologies evaluation framework. Coders also answered contextual questions from the framework to identify whether apps were deemed to be higher risk. Agreement between coders was assessed using Cohen κ statistic.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>In total, we assessed 76 apps from the NHS Apps Library. There was classification agreement for 42 apps. Of these, 0 apps were unanimously classified into Tier 1; 24, into Tier 2; 15, into Tier 3a; and 3, into Tier 3b. There was disagreement between coders in 34/76 cases (45%); interrater agreement was poor (Cohen κ=0.32, 95% CI 0.16-0.47). Further investigation of disagreements highlighted 5 main explanatory themes: apps that did not correspond to any tier, apps that corresponded to multiple tiers, ambiguous tier descriptions, ambiguous app descriptions, and coder error.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>The current iteration of the NICE evidence standards framework for DHTs did not allow mHealth researchers to consistently and unambiguously classify digital health mobile apps listed on the NHS app library according to their functional tier.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>mHealth</kwd>
        <kwd>telehealth</kwd>
        <kwd>evaluation</kwd>
        <kwd>evidence</kwd>
        <kwd>interrater</kwd>
        <kwd>NHS Apps Library</kwd>
        <kwd>NICE</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <p>Mobile apps for use in health care have been proposed in a variety of settings, including telehealth for disease management and monitoring, diagnosis and triage, and medication prescription and reminders [<xref ref-type="bibr" rid="ref1">1</xref>-<xref ref-type="bibr" rid="ref3">3</xref>]. To date, the evidence for their effectiveness is varied [<xref ref-type="bibr" rid="ref4">4</xref>-<xref ref-type="bibr" rid="ref6">6</xref>] and, in general, published evidence is extremely sparse [<xref ref-type="bibr" rid="ref7">7</xref>].</p>
      <p>Thorough governance of medical apps is challenging [<xref ref-type="bibr" rid="ref8">8</xref>]. There is growing recognition that the required level of evidence for medical apps ought to differ depending on app function. For instance, Wyatt [<xref ref-type="bibr" rid="ref9">9</xref>] suggested that apps be categorized as low, medium, or high risk for purposes of evaluation. The UK National Institute for Health and Care Excellence (NICE) further addressed this issue through their recent evidence standards framework for evaluating digital health technologies (DHTs) [<xref ref-type="bibr" rid="ref10">10</xref>]. In this framework, digital technologies are categorized into one of 3 tier levels (with Tier 3 split into Tier 3a and 3b) according to their intended function. An additional set of questions assesses higher-risk technologies, complementing the tier levels.</p>
      <p>The framework also provides minimum and best-practice recommendations on the associated standard of evidence required for each tier. For instance, Tier 3a technologies should be supported, at a minimum, by relevant <italic>high-quality observational</italic> or <italic>quasi-experimental studies.</italic> Best practice would include a <italic>high-quality intervention study</italic>. The recommended evidence standards are cumulative, such that Tier 3 digital technologies should also meet the criteria for Tier 2 and Tier 1 technologies. For Tier 2, this includes, for instance, having evidence to show that any health information provided is valid, accurate, up-to-date, regularly audited, and sufficiently comprehensive. At the time of writing, the framework is in its second iteration.</p>
      <p>Although other frameworks exist and have been used for classifying mobile apps [<xref ref-type="bibr" rid="ref11">11</xref>], we solely examine the NICE framework here. This framework is of particular importance as its development was commissioned by the National Health Service (NHS) England and is therefore likely to become an influential standard. In addition, its recency means that it has thus far received little external validation.</p>
      <p>This study aims to evaluate the appropriateness and potential limitations of the functional classification guidance within the NICE framework as applied to trusted and safe mobile health (mHealth) apps. We will do this by assessing interrater agreement of functional tier classification for all apps curated on the NHS Apps Library. By examining cases in which reviewers disagreed, we will highlight ambiguities in the current classification guidance and discuss potential improvements.</p>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Search Strategy and Data Extraction</title>
        <p>All apps available on the NHS Apps Library [<xref ref-type="bibr" rid="ref12">12</xref>] on July 12, 2019, were included in the analysis. For each app, the app title, screenshots, and description were extracted manually from the NHS Apps Library website; none of the apps were downloaded. No apps were excluded.</p>
      </sec>
      <sec>
        <title>Classification</title>
        <p>Two coders independently classified all apps according to functional classification. The coders were a clinician with formal postgraduate training in health informatics (KN) and an academic with research expertise in mHealth (MEL).</p>
        <p>For each app, we recorded the main features as described on the NHS Apps Library, including any available screenshots. The coders assigned each app to a functional tier and noted whether the app should be considered for risk adjustment based on clinical context. Abridged information about each tier and criteria for determining risk-adjusted apps are shown in <xref ref-type="table" rid="table1">Tables 1</xref> and <xref ref-type="table" rid="table2">2</xref>. To guide our classification decisions, we used the (unabridged) evidence standards framework alongside the associated user guide [<xref ref-type="bibr" rid="ref13">13</xref>]. If an app could be assigned into multiple tiers, it was assigned to the highest relevant tier, as per the framework guidance. If apps met the criteria for both Tier 3a and Tier 3b, they were assigned to 3b.</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Abridged definitions of digital health technology tiers from the National Institute for Health and Care Excellence (NICE) evidence standards evaluation framework [<xref ref-type="bibr" rid="ref10">10</xref>].</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="220"/>
            <col width="750"/>
            <thead>
              <tr valign="top">
                <td colspan="2">Tier and functional classification</td>
                <td>Description</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="3">
                  <bold>1</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>System service</td>
                <td>Improves system efficiency. Unlikely to have direct and measurable individual patient outcomes.</td>
              </tr>
              <tr valign="top">
                <td colspan="3">
                  <bold>2</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Information</td>
                <td>Provides information and resources to patients or the public.</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Simple monitoring</td>
                <td>Allows users to record health parameters to create health diaries.</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Communicate</td>
                <td>Allows two-way communication between users and professionals, carers, third-party organizations, or peers.</td>
              </tr>
              <tr valign="top">
                <td colspan="3">
                  <bold>3a</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Preventative behavior change</td>
                <td>Designed to change user behavior related to health issues with, for example, smoking, eating, alcohol, sexual health, sleeping, and exercise.</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Self-manage</td>
                <td>Aims to help people with a diagnosed condition to manage their health.</td>
              </tr>
              <tr valign="top">
                <td colspan="3">
                  <bold>3b</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Treat</td>
                <td>Provides treatment for a diagnosed condition (such as cognitive behavioral therapy for anxiety), or guides treatment decisions.</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Active monitoring</td>
                <td>Automatically records information and transmits the data to a professional, carer, or third-party organization, without any input from the user, to inform clinical management decisions.</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Calculate</td>
                <td>Tools that perform clinical calculations that are likely to affect clinical care decisions.</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Diagnose</td>
                <td>Uses data to diagnose a condition in a patient, or to guide a diagnostic decision made by a health care professional.</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>Abridged contextual questions to help identify higher-risk digital health technologies (DHTs), abridged from [<xref ref-type="bibr" rid="ref10">10</xref>].</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="500"/>
            <col width="500"/>
            <thead>
              <tr valign="top">
                <td>Question</td>
                <td>Risk adjustment</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>1. Are the intended users of the DHT considered to be in a potentially vulnerable group such as children or at-risk adults?</td>
                <td>The National Health Service (NHS) England defines an at-risk adult as an adult “who may be in need of community care services by reason of mental or other disability, age or illness; and who is or may be unable to take care of him or herself, or unable to protect him or herself against significant harm or exploitation.” If the DHT is intended to be used by people considered to be in a potentially vulnerable group, then a higher level of evidence may be needed, or relevant expert opinion on whether the needs of the users are being appropriately addressed.</td>
              </tr>
              <tr valign="top">
                <td>2. How serious could the consequences be to the user if the DHT failed to perform as described?</td>
                <td>A higher level of potential harm may indicate that the best-practice evidence standards should be used.</td>
              </tr>
              <tr valign="top">
                <td>3. Is the DHT intended to be used with regular support from a suitably qualified and experienced health or social care professional?</td>
                <td>DHTs that are intended to be used with support (ie, with regular support or guidance from a suitably qualified and experienced health or social care professional) could be considered to have lower risk than DHTs that are intended to be used by the patients on their own. This contextual question may require careful interpretation depending on the individual DHT as the involvement of a clinician may in itself indicate that the DHT presents a specific risk.</td>
              </tr>
              <tr valign="top">
                <td>4. Does the DHT include machine learning algorithms or artificial intelligence?</td>
                <td>Refer to the code of conduct for data-driven health and care technology for additional considerations when assessing DHTs that use artificial intelligence or machine learning.</td>
              </tr>
              <tr valign="top">
                <td>5. Is the financial or organizational risk of the DHT expected to be very high?</td>
                <td>DHTs with very high financial risk should be assessed using the best-practice standards to provide surety that the DHT represents good value. High organizational risks may include situations in which implementing the DHT would need complex changes in working practice or care pathways.</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
      </sec>
      <sec>
        <title>Classification</title>
        <p>We used descriptive statistics to summarize the membership of each tier and the differences in classification between reviewers. We reported Cohen κ as an overall measurement of interrater agreement [<xref ref-type="bibr" rid="ref14">14</xref>]. In supplementary analysis, we reported interrater agreement for a subset of apps that had previously been classified by another independent team and published in the framework user guide. All analyses were conducted using MATLAB (version 18.1; MathWorks) [<xref ref-type="bibr" rid="ref15">15</xref>].</p>
        <p>We examined any apps with discrepancies in functional tier classification in greater detail. Using the publicly available descriptions of each app, we identified common themes that may have led to differences in classification.</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Interrater Agreement Results</title>
        <p>In total, we assessed 76 apps from the NHS Apps Library. Overall, 0 apps were unanimously classified into Tier 1; 24/76 (32%), into Tier 2; 15/76 (20%), into Tier 3a; and 3/76 (4%), into Tier 3b. Full classification details for each app are presented in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p>
        <p><xref ref-type="table" rid="table3">Table 3</xref> shows the interrater agreement for each tier of app. There was disagreement in 34/76 cases (45%) and Cohen κ was 0.32 (95% CI 0.16-0.47), which is commonly considered to correspond to poor agreement [<xref ref-type="bibr" rid="ref12">12</xref>]. Of the 34 apps for which functional classification differed, 13 were due to discrepancies between apps classified in Tier 3a or 3b. The next largest group of discrepancies was with apps classified between Tiers 2 and 3a (n=11). Analysis of interrater agreement for a subset of apps previously reviewed by a third independent group is presented in <xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref> (coder 1: κ=0.48, 95% CI 0.31-0.66; coder 2: κ=0.62, 95% CI 0.44-0.80). The apps for which the independent coders disagreed are also listed in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendices 1</xref> and <xref ref-type="supplementary-material" rid="app2">2</xref>.</p>
        <p>Coders also assessed on whether an app should be considered for <italic>risk adjustment</italic> or not based on a set on contextual questions (<xref ref-type="table" rid="table2">Table 2</xref>). Of these, Questions 2, 4, and 5 were not possible to answer without wider knowledge of the source code (for Question 4) or how the app interacted with the wider health care system. A total of 9 apps were unanimously considered to warrant risk adjustment, and 63 for no adjustment. There were discrepancies for 4 apps, and overall agreement may be considered good (κ=0.79; 95% CI 0.59-0.99).</p>
        <table-wrap position="float" id="table3">
          <label>Table 3</label>
          <caption>
            <p>Number of apps coded into each functional tier classification, for both coders.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="250"/>
            <col width="0"/>
            <col width="200"/>
            <col width="0"/>
            <col width="200"/>
            <col width="0"/>
            <col width="200"/>
            <col width="0"/>
            <col width="150"/>
            <col width="0"/>
            <thead>
              <tr valign="top">
                <td>Classification: coder 1</td>
                <td colspan="8">Classification: coder 2</td>
                <td>
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">1</td>
                <td colspan="2">2</td>
                <td colspan="2">3a</td>
                <td colspan="2">3b</td>
                <td>
                  <break/>
                </td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="2">1</td>
                <td colspan="2">0</td>
                <td colspan="2">6</td>
                <td colspan="2">0</td>
                <td colspan="2">0</td>
              </tr>
              <tr valign="top">
                <td colspan="2">2</td>
                <td colspan="2">1</td>
                <td colspan="2">24</td>
                <td colspan="2">5</td>
                <td colspan="2">3</td>
              </tr>
              <tr valign="top">
                <td colspan="2">3a</td>
                <td colspan="2">0</td>
                <td colspan="2">6</td>
                <td colspan="2">15</td>
                <td colspan="2">1</td>
              </tr>
              <tr valign="top">
                <td colspan="2">3b</td>
                <td colspan="2">0</td>
                <td colspan="2">0</td>
                <td colspan="2">12</td>
                <td colspan="2">3</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
      </sec>
      <sec>
        <title>Analysis of Coder Discrepancies</title>
        <p>On further collaborative review, differences in tier classification could be attributed to 5 causes: (1) app function not listed within framework, (2) app function corresponded to multiple tiers, (3) ambiguous tier description, (4) ambiguous app description, and (5) human error.</p>
        <sec>
          <title>App Function Not Listed Within Framework</title>
          <p>This occurred when the intended function of the app, as described on the NHS Apps Library, did not correspond to any of the functions listed within the NICE framework. One example of this was <italic>MyChoicePad</italic>, an app that is designed to assist nonverbal communication via symbols and signs from the Makaton language. Although one coder categorized the app and function as Tier 2: Communicate, the app does not facilitate two-way communication, so it does not strictly meet the associated criteria. Similarly, the other reviewers categorized the app into Tier 1: System service, but it is not entirely clear whether the app is designed to improve system efficiency, or even what the system is in this case.</p>
        </sec>
        <sec>
          <title>App Function Corresponded to Multiple Tiers</title>
          <p>This occurred when a single function of an app corresponded to more than 1 tier within the NICE framework. In particular, we noted ambiguity around mental health apps. For example, mindfulness or principles from cognitive behavioral therapy may be classified as a self-management strategy to reduce feelings of anxiety (3a), or as a treatment for anxiety disorders (3b). In this situation, the tier classification depended on the clinical use case, rather than the app function, which was the same in both cases. We also noted ambiguity between lifestyle/well-being versus diagnosed conditions. For example, an app that had a function to provide advice on stress or anxiety reduction might be classified into Tier 2 if it provides generic information, but into 3a if <italic>stress</italic> were part of a diagnosed condition.</p>
        </sec>
        <sec>
          <title>Ambiguous Tier Description</title>
          <p>Some terms used to describe the tiers within the NICE framework were not clearly defined. One specific instance of ambiguity between Tiers 3a and 3b occurred for the <italic>engage warfarin self-care</italic> app. The app allows warfarin test results to be <italic>actively reviewed</italic> by a clinician, but it is unclear whether this constitutes Tier 3b’s <italic>active monitoring</italic> as results must be manually transmitted by the user, or <italic>simple monitoring</italic> which allows users to record their health parameters.</p>
        </sec>
        <sec>
          <title>Ambiguous App Description</title>
          <p>In some instances, the information provided on the NHS Apps Library was insufficient to definitively categorize an app. For example, screenshots on the NHS Apps Library for the <italic>NHS App</italic> show that users can <italic>check symptoms,</italic> but this functionality was not mentioned elsewhere. The main description for the app stated that users can “find reliable NHS information on hundreds of conditions and treatments, and get immediate advice.” If the app merely catalogs searchable information on symptoms, it should be classed as Tier 2; however, if it provides a symptom checker algorithm that requires user input, and outputs relevant immediate advice, it is Tier 3b.</p>
        </sec>
        <sec>
          <title>Human Error</title>
          <p>This occurred when the reviewer failed to identify a relevant piece of information that would have influenced their tier categorization decision. This most commonly occurred when an app had multiple functions belonging to multiple functional tiers. Typically, the main stated function of the app belonged to the lower tier. For instance, the <italic>Healthera</italic> app is primarily designed to manage prescription management, a Tier 1 function. However, it also allows users to contact their pharmacist for clinical advice via the app, a Tier 2 function. Human errors accounted for 6 apps. In conjunction with the primary result, we can estimate an upper bound on the level of disagreement as 28/76 (37%; κ=0.44, 95% CI 0.30-0.60), when human error is removed.</p>
        </sec>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Results</title>
        <p>Our results show that, from their publicly available descriptions, only 42 of the 76 apps collated in the NHS Apps Library could be classified into a functional tier consistently by informed individuals. Interrater agreement between reviewers was poor (κ=0.32).</p>
        <p>Of the 34 apps where there was disagreement, there was a subset (28/34) for which the tier could not be agreed, even after consultation (ie, excluding human error). In these cases, disagreement was attributed to four broad categories: <italic>App function not listed within framework</italic>, <italic>App function corresponded to multiple tiers</italic>, <italic>Ambiguous tier description</italic>, and <italic>Ambiguous app description</italic>.</p>
        <p>There was good agreement in assessment of <italic>higher risk</italic> using the NICE framework’s contextual questions (κ=0.79). Despite this, we noted ambiguity in the risk adjustment questions. One example of ambiguity occurs for the assessment question: “Does the DHT include machine learning algorithms or artificial intelligence?” If yes, framework users are advised to refer to the Code of Conduct for data-driven health and care technology [<xref ref-type="bibr" rid="ref16">16</xref>].</p>
        <p>Whilst the framework defines both artificial intelligence and machine learning, the provided definitions are ambiguous. For instance, it states that “Machine learning is an application of artificial intelligence that provides systems the ability to automatically learn and improve from experience without being explicitly programmed.” This definition would include the vast majority of quantitative methods. For instance, linear regression is fully defined by the slope and offset parameters, as learned directly from a data set.</p>
        <p>The Code of Conduct further provides an external link to the definition from the AHSN Network AI Initiative [<xref ref-type="bibr" rid="ref17">17</xref>]. No definition is provided here, instead readers are informed that “there is no single, universally agreed definition of AI.” The absence of any clear definition means that this risk adjustment question cannot be answered objectively.</p>
        <p>Poor interrater agreement of tier classification may be attributed to two potential causes. First, the publicly available information on the NHS App Library may be insufficient to determine the functional tier. If true, this would motivate tighter regulation of how apps are described on the Library to ensure that the intended medical condition and patient or user group is clear. Second, the DHT framework is not specific enough to classify some types of apps. Based on our thematic analysis, in which we showed examples of misclassified apps due to ambiguity in the framework, we contend that this second reason contributes significantly to the overall level of disagreement.</p>
        <p>A framework with inadequate specificity has implications for both developers and regulators. For developers wishing to bring products to market as soon as possible, opportunity for misclassification due to ambiguous tier criteria may result in more classifications to lower tiers, where the minimum standard of evidence is not as stringent. In particular, Tier 2 technologies require only information that would be commonly audited in standard software development, whereas Tier 3a and Tier 3b technologies specifically require formal studies that would likely require additional time and financial resource. This may lead to situations in which apps are regulated to a level of lower scrutiny than they ought to be, given their function.</p>
        <p>In addition to the main result, we observed that some apps were categorized consistently by reviewers, but the minimum suggested level of evidence did not seem to align with the level of potential clinical risk. One example of this was <italic>Cypher</italic>, which was classified in Tier 2 as it facilitates communication. Whereas other Tier 2 apps allowed communication with health professionals, this app facilitates anonymous communication with other users to allow “anyone who want to share their thoughts”. (We further note that as of October 17, 2019, Cypher App is not available for download on the either the Google Play store or Apple App Store and that the developer website is nonfunctional. Persistence of apps is a known problem in digital health [<xref ref-type="bibr" rid="ref18">18</xref>].) The evidence framework directly addresses this use case by requiring peer-support apps to show evidence of appropriate safeguarding. However, internet communities, forums, and chat rooms [<xref ref-type="bibr" rid="ref19">19</xref>,<xref ref-type="bibr" rid="ref20">20</xref>] with similar peer-support functions have been shown to have potential for negative patient impact. Given that the potential harm aligns more closely with apps in Tier 3a or 3b, a higher standard of evidence may be appropriate for peer-support apps, despite the similarity in technical function to other communication apps.</p>
        <p>Within mHealth more widely, we can select examples in which the dissonance between functional tier and required evidence is even greater. For instance, consider an app that calculates BMI by requiring the user to type height and weight. This may be categorized into Tier 3b, as a tool that performs a clinical calculation that can affect clinical care decisions. The associated minimum standard of evidence in the NICE framework requires a high-quality intervention study—a level of scrutiny that ought not to be required for simple and well-established calculations. This does not preclude the need for careful technical evaluation; indeed, Huckvale et al [<xref ref-type="bibr" rid="ref21">21</xref>] have demonstrated how even simple clinical calculations in diabetes apps are often calculated or displayed incorrectly.</p>
        <p>More broadly, this indicates that clinical risk and technical risk are not necessarily the same. One possible option to enable better classification and more specific evaluation guidance may be to categorize apps by technical complexity as well as clinical function. The idea of separating clinical and technical evaluation has been raised previously. Lewis and Wyatt [<xref ref-type="bibr" rid="ref22">22</xref>] suggest evaluation could be based on the probability and severity of clinical harm, the complexity of the app, and additional contextual factors [<xref ref-type="bibr" rid="ref22">22</xref>]. Others have commented that evidence of clinical effectiveness for software should include indication of safety, and that this must include formal technical evaluation [<xref ref-type="bibr" rid="ref23">23</xref>].</p>
      </sec>
      <sec>
        <title>Limitations</title>
        <p>The NICE framework is designed for DHTs that are commissioned by the health system. In this case, we examined apps on the NHS Apps Library. According to the library’s HTML description metadata, these are “digital tools that have been assessed by the NHS as clinically safe and secure to use” [<xref ref-type="bibr" rid="ref12">12</xref>]. However, in most instances, the apps had not been specifically commissioned. Although the relevance of the framework for these apps may vary, multiple apps in the library (eg, GDm-Health, Health Help Now) have already been used as part of routine clinical care.</p>
        <p>The primary limitation of work is that our classifications relied on the information presented by the NHS App Library; additionally, none of the apps were downloaded. As all products published in the Library met a set of internal standards, we believed, a priori, that written descriptions and screenshots should be sufficient to enable clear identification of all key functions (as this is key information for informing consumer app selection). Our results showed that this was not the case. Downloading each app would provide more comprehensive understanding of the key functions and may increase interrater agreement on app tier. We therefore recommend that future studies download and engage directly with the contents of mobile apps.</p>
        <p>Our study compares only two sets of raters, so the results might be unduly influenced by the poor performance of a single individual. We partly addressed this by comparing interrater reliability with a third set of reviewers (see <xref ref-type="supplementary-material" rid="app1">Multimedia Appendices 1</xref> and <xref ref-type="supplementary-material" rid="app2">2</xref>), for which similar results were obtained. These were not included in the main manuscript as reviews were conducted under different conditions; we do not know if apps were downloaded, nor the time at which their review took place.</p>
        <p>Finally, the NICE framework establishes a functional tier and provides guidance on required levels of evidence at each tier. In this work, we only examined the consistency of tier classification, and did not address whether apps within a tier met the evidence standards.</p>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>The NICE evidence standards framework for evaluating digital technologies is a significant and timely step toward establishing appropriate levels of evidence for DHTs. Despite this, we have demonstrated that the current iteration of the framework did not allow mHealth researchers to consistently and unambiguously group a set of digital health mobile apps according to their functional tier. In users with limited experience of mHealth evaluation (eg, app developers), we postulate that this ambiguity may lead to higher levels of misclassification. One potential improvement would be to classify DHTs by their technical complexity in addition to clinical function.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>Raw data used for analysis. Independent data were extracted from NICE case studies.</p>
        <media xlink:href="jmir_v22i6e17457_app1.docx" xlink:title="DOCX File , 20 KB"/>
      </supplementary-material>
      <supplementary-material id="app2">
        <label>Multimedia Appendix 2</label>
        <p>Inter-rater reliability crosstables for coders 1 and 2 vs the case study examples provided with the evidence standards framework.</p>
        <media xlink:href="jmir_v22i6e17457_app2.docx" xlink:title="DOCX File , 14 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">DHT</term>
          <def>
            <p>digital health technologies</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">NHS</term>
          <def>
            <p>National Health Service</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">NICE</term>
          <def>
            <p>National Institute for Clinical Excellence</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>KN was supported by a Chevening Scholarship.</p>
    </ack>
    <fn-group>
      <fn fn-type="con">
        <p>All authors contributed to the design, analysis, and final manuscript writing. KN undertook the initial data extraction.</p>
      </fn>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hanlon</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Daines</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Campbell</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>McKinstry</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Weller</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Pinnock</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Telehealth interventions to support self-management of long-term conditions: a systematic metareview of diabetes, heart failure, asthma, chronic obstructive pulmonary disease, and cancer</article-title>
          <source>J Med Internet Res</source>
          <year>2017</year>
          <month>05</month>
          <day>17</day>
          <volume>19</volume>
          <issue>5</issue>
          <fpage>e172</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.jmir.org/2017/5/e172/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/jmir.6688</pub-id>
          <pub-id pub-id-type="medline">28526671</pub-id>
          <pub-id pub-id-type="pii">v19i5e172</pub-id>
          <pub-id pub-id-type="pmcid">PMC5451641</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Semigran</surname>
              <given-names>HL</given-names>
            </name>
            <name name-style="western">
              <surname>Linder</surname>
              <given-names>JA</given-names>
            </name>
            <name name-style="western">
              <surname>Gidengil</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Mehrotra</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Evaluation of symptom checkers for self diagnosis and triage: audit study</article-title>
          <source>BMJ</source>
          <year>2015</year>
          <volume>351</volume>
          <fpage>h3480</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.bmj.com/cgi/pmidlookup?view=long&#38;pmid=26157077"/>
          </comment>
          <pub-id pub-id-type="medline">26157077</pub-id>
          <pub-id pub-id-type="pmcid">PMC4496786</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Santo</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Richtering</surname>
              <given-names>SS</given-names>
            </name>
            <name name-style="western">
              <surname>Chalmers</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Thiagalingam</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Chow</surname>
              <given-names>CK</given-names>
            </name>
            <name name-style="western">
              <surname>Redfern</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Mobile phone apps to improve medication adherence: a systematic stepwise process to identify high-quality apps</article-title>
          <source>JMIR Mhealth Uhealth</source>
          <year>2016</year>
          <month>12</month>
          <day>02</day>
          <volume>4</volume>
          <issue>4</issue>
          <fpage>e132</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://mhealth.jmir.org/2016/4/e132/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/mhealth.6742</pub-id>
          <pub-id pub-id-type="medline">27913373</pub-id>
          <pub-id pub-id-type="pii">v4i4e132</pub-id>
          <pub-id pub-id-type="pmcid">PMC5161780</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Whitehead</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Seaton</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>The effectiveness of self-management mobile phone and tablet apps in long-term condition management: a systematic review</article-title>
          <source>J Med Internet Res</source>
          <year>2016</year>
          <volume>18</volume>
          <issue>5</issue>
          <fpage>e97</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.jmir.org/2016/5/e97/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/jmir.4883</pub-id>
          <pub-id pub-id-type="medline">27185295</pub-id>
          <pub-id pub-id-type="pii">v18i5e97</pub-id>
          <pub-id pub-id-type="pmcid">PMC4886099</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Fraser</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Coiera</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Wong</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Safety of patient-facing digital symptom checkers</article-title>
          <source>Lancet</source>
          <year>2018</year>
          <month>12</month>
          <day>24</day>
          <volume>392</volume>
          <issue>10161</issue>
          <fpage>2263</fpage>
          <lpage>2264</lpage>
          <pub-id pub-id-type="doi">10.1016/S0140-6736(18)32819-8</pub-id>
          <pub-id pub-id-type="medline">30413281</pub-id>
          <pub-id pub-id-type="pii">S0140-6736(18)32819-8</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Free</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Phillips</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Galli</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Watson</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Felix</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Edwards</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Patel</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Haines</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>The effectiveness of mobile-health technology-based health behaviour change or disease management interventions for health care consumers: a systematic review</article-title>
          <source>PLoS Med</source>
          <year>2013</year>
          <volume>10</volume>
          <issue>1</issue>
          <fpage>e1001362</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://dx.plos.org/10.1371/journal.pmed.1001362"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pmed.1001362</pub-id>
          <pub-id pub-id-type="medline">23349621</pub-id>
          <pub-id pub-id-type="pii">PMEDICINE-D-12-00520</pub-id>
          <pub-id pub-id-type="pmcid">PMC3548655</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Larsen</surname>
              <given-names>ME</given-names>
            </name>
            <name name-style="western">
              <surname>Huckvale</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Nicholas</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Torous</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Birrell</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Reda</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>Using science to sell apps: evaluation of mental health app store quality claims</article-title>
          <source>NPJ Digit Med</source>
          <year>2019</year>
          <volume>2</volume>
          <fpage>18</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/31304366"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41746-019-0093-1</pub-id>
          <pub-id pub-id-type="medline">31304366</pub-id>
          <pub-id pub-id-type="pii">93</pub-id>
          <pub-id pub-id-type="pmcid">PMC6550255</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Magrabi</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Habli</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Sujan</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Wong</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Thimbleby</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Baker</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Coiera</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Why is it so difficult to govern mobile apps in healthcare?</article-title>
          <source>BMJ Health Care Inform</source>
          <year>2019</year>
          <month>11</month>
          <volume>26</volume>
          <issue>1</issue>
          <fpage>e100006</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://informatics.bmj.com/cgi/pmidlookup?view=long&#38;pmid=31744843"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/bmjhci-2019-100006</pub-id>
          <pub-id pub-id-type="medline">31744843</pub-id>
          <pub-id pub-id-type="pii">bmjhci-2019-100006</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wyatt</surname>
              <given-names>JC</given-names>
            </name>
          </person-group>
          <article-title>How can clinicians, specialty societies and others evaluate and improve the quality of apps for patient use?</article-title>
          <source>BMC Med</source>
          <year>2018</year>
          <month>12</month>
          <day>03</day>
          <volume>16</volume>
          <issue>1</issue>
          <fpage>225</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcmedicine.biomedcentral.com/articles/10.1186/s12916-018-1211-7"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12916-018-1211-7</pub-id>
          <pub-id pub-id-type="medline">30501638</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12916-018-1211-7</pub-id>
          <pub-id pub-id-type="pmcid">PMC6276222</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="web">
          <source>Evidence Standards Framework for Digital Health Technologies</source>
          <year>2019</year>
          <month>03</month>
          <access-date>2019-10-17</access-date>
          <publisher-loc>London</publisher-loc>
          <publisher-name>National Institute for Health and Care Excellence</publisher-name>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.nice.org.uk/Media/Default/About/what-we-do/our-programmes/evidence-standards-framework/digital-evidence-standards-framework.pdf">https://www.nice.org.uk/Media/Default/About/what-we-do/our-programmes/evidence-standards-framework/digital-evidence-standards-framework.pdf</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Olla</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Shimskey</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>mHealth taxonomy: a literature survey of mobile health applications</article-title>
          <source>Health Technol</source>
          <year>2015</year>
          <month>1</month>
          <day>30</day>
          <volume>4</volume>
          <issue>4</issue>
          <fpage>299</fpage>
          <lpage>308</lpage>
          <pub-id pub-id-type="doi">10.1007/s12553-014-0093-8</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <collab>NHS</collab>
          </person-group>
          <source>NHS Apps Library</source>
          <access-date>2019-10-17</access-date>
          <publisher-loc>London</publisher-loc>
          <publisher-name>NHS</publisher-name>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.nhs.uk/apps-library/">https://www.nhs.uk/apps-library/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <collab>National Institute for Health and Care Excellence</collab>
          </person-group>
          <source>Evidence Standards Framework for Digital Health Technologies: User Guide</source>
          <year>2019</year>
          <month>03</month>
          <access-date>2019-10-17</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.nice.org.uk/Media/Default/About/what-we-do/our-programmes/evidence-standards-framework/user-guide.pdf">https://www.nice.org.uk/Media/Default/About/what-we-do/our-programmes/evidence-standards-framework/user-guide.pdf</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Fleiss</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Levin</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Paik</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>The measurement of interrater agreement</article-title>
          <source>Statistical Methods for Rates and Proportions. 3rd edition</source>
          <year>2003</year>
          <publisher-loc>New York</publisher-loc>
          <publisher-name>John Wiley &#38; Sons Inc</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <collab>MathWorks</collab>
          </person-group>
          <source>MATLAB and Statistics Toolbox Release</source>
          <year>2018</year>
          <publisher-loc>Natick, MA</publisher-loc>
          <publisher-name>MathWorks</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <collab>Department of Health &#38; Social Care</collab>
          </person-group>
          <source>Code of Conduct for Data-Driven Health and Care Technology (Updated 18 July 2019)</source>
          <access-date>2019-10-17</access-date>
          <publisher-loc>London</publisher-loc>
          <publisher-name>Department of Health &#38; Social Care</publisher-name>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.gov.uk/government/publications/code-of-conduct-for-data-driven-health-and-care-technology/initial-code-of-conduct-for-data-driven-health-and-care-technology">https://www.gov.uk/government/publications/code-of-conduct-for-data-driven-health-and-care-technology/initial-code-of-conduct-for-data-driven-health-and-care-technology</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <collab>The AHSN Network</collab>
          </person-group>
          <source>AI in Health and Care: What Do We Mean by AI in Health and Care</source>
          <access-date>2019-10-17</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://ai.ahsnnetwork.com/about/ai-in-health-and-care/">http://ai.ahsnnetwork.com/about/ai-in-health-and-care/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Larsen</surname>
              <given-names>ME</given-names>
            </name>
            <name name-style="western">
              <surname>Nicholas</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Christensen</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Quantifying app store dynamics: longitudinal tracking of mental health apps</article-title>
          <source>JMIR Mhealth Uhealth</source>
          <year>2016</year>
          <month>08</month>
          <day>09</day>
          <volume>4</volume>
          <issue>3</issue>
          <fpage>e96</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://mhealth.jmir.org/2016/3/e96/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/mhealth.6020</pub-id>
          <pub-id pub-id-type="medline">27507641</pub-id>
          <pub-id pub-id-type="pii">v4i3e96</pub-id>
          <pub-id pub-id-type="pmcid">PMC4995352</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Beebe</surname>
              <given-names>TJ</given-names>
            </name>
            <name name-style="western">
              <surname>Asche</surname>
              <given-names>SE</given-names>
            </name>
            <name name-style="western">
              <surname>Harrison</surname>
              <given-names>PA</given-names>
            </name>
            <name name-style="western">
              <surname>Quinlan</surname>
              <given-names>KB</given-names>
            </name>
          </person-group>
          <article-title>Heightened vulnerability and increased risk-taking among adolescent chat room users: results from a statewide school survey</article-title>
          <source>J Adolesc Health</source>
          <year>2004</year>
          <month>08</month>
          <volume>35</volume>
          <issue>2</issue>
          <fpage>116</fpage>
          <lpage>23</lpage>
          <pub-id pub-id-type="doi">10.1016/j.jadohealth.2003.09.012</pub-id>
          <pub-id pub-id-type="medline">15261640</pub-id>
          <pub-id pub-id-type="pii">S1054139X03005287</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Daine</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Hawton</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Singaravelu</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Stewart</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Simkin</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Montgomery</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>The power of the web: a systematic review of studies of the influence of the internet on self-harm and suicide in young people</article-title>
          <source>PLoS One</source>
          <year>2013</year>
          <volume>8</volume>
          <issue>10</issue>
          <fpage>e77555</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://dx.plos.org/10.1371/journal.pone.0077555"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0077555</pub-id>
          <pub-id pub-id-type="medline">24204868</pub-id>
          <pub-id pub-id-type="pii">PONE-D-13-14108</pub-id>
          <pub-id pub-id-type="pmcid">PMC3813687</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Huckvale</surname>
              <given-names>Kit</given-names>
            </name>
            <name name-style="western">
              <surname>Adomaviciute</surname>
              <given-names>Samanta</given-names>
            </name>
            <name name-style="western">
              <surname>Prieto</surname>
              <given-names>José Tomás</given-names>
            </name>
            <name name-style="western">
              <surname>Leow</surname>
              <given-names>Melvin Khee-Shing</given-names>
            </name>
            <name name-style="western">
              <surname>Car</surname>
              <given-names>Josip</given-names>
            </name>
          </person-group>
          <article-title>Smartphone apps for calculating insulin dose: a systematic assessment</article-title>
          <source>BMC Med</source>
          <year>2015</year>
          <month>05</month>
          <day>06</day>
          <volume>13</volume>
          <fpage>106</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcmedicine.biomedcentral.com/articles/10.1186/s12916-015-0314-7"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12916-015-0314-7</pub-id>
          <pub-id pub-id-type="medline">25943590</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12916-015-0314-7</pub-id>
          <pub-id pub-id-type="pmcid">PMC4433091</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lewis</surname>
              <given-names>TL</given-names>
            </name>
            <name name-style="western">
              <surname>Wyatt</surname>
              <given-names>JC</given-names>
            </name>
          </person-group>
          <article-title>mHealth and mobile medical Apps: a framework to assess risk and promote safer use</article-title>
          <source>J Med Internet Res</source>
          <year>2014</year>
          <month>09</month>
          <day>15</day>
          <volume>16</volume>
          <issue>9</issue>
          <fpage>e210</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.jmir.org/2014/9/e210/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/jmir.3133</pub-id>
          <pub-id pub-id-type="medline">25223398</pub-id>
          <pub-id pub-id-type="pii">v16i9e210</pub-id>
          <pub-id pub-id-type="pmcid">PMC4180335</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Thimbleby</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Lewis</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Williams</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Making healthcare safer by understanding, designing and buying better IT</article-title>
          <source>Clin Med (Lond)</source>
          <year>2015</year>
          <month>06</month>
          <volume>15</volume>
          <issue>3</issue>
          <fpage>258</fpage>
          <lpage>62</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/26031976"/>
          </comment>
          <pub-id pub-id-type="doi">10.7861/clinmedicine.15-3-258</pub-id>
          <pub-id pub-id-type="medline">26031976</pub-id>
          <pub-id pub-id-type="pii">15/3/258</pub-id>
          <pub-id pub-id-type="pmcid">PMC4953110</pub-id>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
