<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMIR</journal-id>
      <journal-id journal-id-type="nlm-ta">J Med Internet Res</journal-id>
      <journal-title>Journal of Medical Internet Research</journal-title>
      <issn pub-type="epub">1438-8871</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v26i1e46455</article-id>
      <article-id pub-id-type="pmid">39163593</article-id>
      <article-id pub-id-type="doi">10.2196/46455</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Effective Privacy Protection Strategies for Pregnancy and Gestation Information From Electronic Medical Records: Retrospective Study in a National Health Care Data Network in China</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Mavragani</surname>
            <given-names>Amaryllis</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Chaichulee</surname>
            <given-names>Sitthichok</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Gasmi </surname>
            <given-names>Maha </given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Shuang</surname>
            <given-names>Wang</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Yu</surname>
            <given-names>Fei</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author">
          <name name-style="western">
            <surname>Liu</surname>
            <given-names>Chao</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-8960-661X</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author">
          <name name-style="western">
            <surname>Jiao</surname>
            <given-names>Yuanshi</given-names>
          </name>
          <degrees>MPH</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-0993-3830</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Su</surname>
            <given-names>Licong</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-3085-909X</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Liu</surname>
            <given-names>Wenna</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0003-3343-1501</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author">
          <name name-style="western">
            <surname>Zhang</surname>
            <given-names>Haiping</given-names>
          </name>
          <degrees>BS</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0000-6128-9192</ext-link>
        </contrib>
        <contrib id="contrib6" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Nie</surname>
            <given-names>Sheng</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-8267-7909</ext-link>
        </contrib>
        <contrib id="contrib7" contrib-type="author" corresp="yes" equal-contrib="yes">
          <name name-style="western">
            <surname>Gong</surname>
            <given-names>Mengchun</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <address>
            <institution>School of Biomedical Engineering, Guangdong Medical University</institution>
            <addr-line>No 2, Wenming East Road</addr-line>
            <addr-line>Xiashan District</addr-line>
            <addr-line>Zhanjiang, 524000</addr-line>
            <country>China</country>
            <phone>86 18611768672</phone>
            <email>gmc@nrdrs.org</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-8197-6643</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Digital Health China Technologies Co, Ltd</institution>
        <addr-line>Beijing</addr-line>
        <country>China</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Department of Nephrology</institution>
        <institution>Nanfang Hospital</institution>
        <institution>Southern Medical University</institution>
        <addr-line>Guangzhou</addr-line>
        <country>China</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>School of Biomedical Engineering, Guangdong Medical University</institution>
        <addr-line>Zhanjiang</addr-line>
        <country>China</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Mengchun Gong <email>gmc@nrdrs.org</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <year>2024</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>20</day>
        <month>8</month>
        <year>2024</year>
      </pub-date>
      <volume>26</volume>
      <elocation-id>e46455</elocation-id>
      <history>
        <date date-type="received">
          <day>2</day>
          <month>5</month>
          <year>2023</year>
        </date>
        <date date-type="rev-request">
          <day>13</day>
          <month>9</month>
          <year>2023</year>
        </date>
        <date date-type="rev-recd">
          <day>2</day>
          <month>1</month>
          <year>2024</year>
        </date>
        <date date-type="accepted">
          <day>22</day>
          <month>6</month>
          <year>2024</year>
        </date>
      </history>
      <copyright-statement>©Chao Liu, Yuanshi Jiao, Licong Su, Wenna Liu, Haiping Zhang, Sheng Nie, Mengchun Gong. Originally published in the Journal of Medical Internet Research (https://www.jmir.org), 20.08.2024.</copyright-statement>
      <copyright-year>2024</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in the Journal of Medical Internet Research (ISSN 1438-8871), is properly cited. The complete bibliographic information, a link to the original publication on https://www.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://www.jmir.org/2024/1/e46455" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Pregnancy and gestation information is routinely recorded in electronic medical record (EMR) systems across China in various data sets. The combination of data on the number of pregnancies and gestations can imply occurrences of abortions and other pregnancy-related issues, which is important for clinical decision-making and personal privacy protection. However, the distribution of this information inside EMR is variable due to inconsistent IT structures across different EMR systems. A large-scale quantitative evaluation of the potential exposure of this sensitive information has not been previously performed, ensuring the protection of personal information is a priority, as emphasized in Chinese laws and regulations.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>This study aims to perform the first nationwide quantitative analysis of the identification sites and exposure frequency of sensitive pregnancy and gestation information. The goal is to propose strategies for effective information extraction and privacy protection related to women’s health.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>This study was conducted in a national health care data network. Rule-based protocols for extracting pregnancy and gestation information were developed by a committee of experts. A total of 6 different sub–data sets of EMRs were used as schemas for data analysis and strategy proposal. The identification sites and frequencies of identification in different sub–data sets were calculated. Manual quality inspections of the extraction process were performed by 2 independent groups of reviewers on 1000 randomly selected records. Based on these statistics, strategies for effective information extraction and privacy protection were proposed.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>The data network covered hospitalized patients from 19 hospitals in 10 provinces of China, encompassing 15,245,055 patients over an 11-year period (January 1, 2010-December 12, 2020). Among women aged 14-50 years, 70% were randomly selected from each hospital, resulting in a total of 1,110,053 patients. Of these, 688,268 female patients with sensitive reproductive information were identified. The frequencies of identification were variable, with the marriage history in admission medical records being the most frequent at 63.24%. Notably, more than 50% of female patients were identified with pregnancy and gestation history in nursing records, which is not generally considered a sub–data set rich in reproductive information. During the manual curation and review process, 1000 cases were randomly selected, and the precision and recall rates of the information extraction method both exceeded 99.5%. The privacy-protection strategies were designed with clear technical directions.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>Significant amounts of critical information related to women’s health are recorded in Chinese routine EMR systems and are distributed in various parts of the records with different frequencies. This requires a comprehensive protocol for extracting and protecting the information, which has been demonstrated to be technically feasible. Implementing a data-based strategy will enhance the protection of women’s privacy and improve the accessibility of health care services.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>pregnancy</kwd>
        <kwd>electronic medical record</kwd>
        <kwd>privacy protection</kwd>
        <kwd>risk stratification</kwd>
        <kwd>rule-based</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <p>Medical information is generally considered to be highly sensitive for individuals, and any breach of privacy can cause direct or indirect harm to patients [<xref ref-type="bibr" rid="ref1">1</xref>]. For female patients, pregnancy and gestation information is not only highly private but also implies the incidence of abortion, which is extremely controversial in terms of the rights and responsibilities of women in some jurisdictions [<xref ref-type="bibr" rid="ref2">2</xref>-<xref ref-type="bibr" rid="ref4">4</xref>]. Evidence suggests that the leakage of such information can negatively impact the attitudes of patients’ social environment and even health care providers [<xref ref-type="bibr" rid="ref4">4</xref>,<xref ref-type="bibr" rid="ref5">5</xref>].</p>
      <p>The worldwide implementation of electronic medical records (EMRs) has significantly improved patient care by making health information readily accessible to a wide range of data producers. From 2007 to 2018, the average adoption rates of EMR increased from 18.6% to 85.3% [<xref ref-type="bibr" rid="ref6">6</xref>]. This rapid growth has led to the processing and storage of various categories of patient information, including demographics, medications, laboratory tests, and diagnostic records, thereby establishing EMR as a valuable resource for large-scale data analysis of real-world data. However, the unprecedented use of EMR posed new challenges for protecting patient information effectively and preventing the unnecessary exposure of sensitive data during real-world evidence (RWE) research. Consequently, there is growing attention to the legal and technical research on extracting pregnancy and gestation information and the relevant privacy protection strategies [<xref ref-type="bibr" rid="ref7">7</xref>,<xref ref-type="bibr" rid="ref8">8</xref>].</p>
      <p>On March 26, 2021, the Binhai Procuratorate accepted and examined a case of infringement of citizens’ personal information. Staff responsible for preventive health care at a town town-central health center in Binhai County, Jiangsu province, took advantage of their positions to illegally obtain the family contact information and home addresses of pregnant women and newborns, totaling 25,124 items. This information was then resold through digital platforms, resulting in an illegal profit of US $4566 and subjecting pregnant women to telephone harassment. In response to this phenomenon, starting in 2022, local authorities began conducting annual comprehensive inspection of the supervision of fertility information and specifically informed the procuratorial organs of the inspection results [<xref ref-type="bibr" rid="ref9">9</xref>]. New laws and regulations have also come out, such as the “Guangdong Province Maternal and Child Health Care Management Regulations” began to implement, which came into effect on June 1, 2023. These regulations emphasize the confidentiality of personal information and privacy in maternal and child health care services and related supervision and management [<xref ref-type="bibr" rid="ref10">10</xref>].</p>
      <p>According to the “Technical Specifications for Hospital Information Platforms Based on EMR” issued by the National Health Commission of China in 2014, different health institutions in the country share a similar EMR framework comprising several sub–data sets including diagnostic information, medical advice, laboratory test results, examination information, and surgical records [<xref ref-type="bibr" rid="ref11">11</xref>]. However, issues of discontinuity and incompleteness in EMR writing pose significant challenges in multicenter data integration [<xref ref-type="bibr" rid="ref12">12</xref>]. Traditional information extraction and privacy protection strategies during RWE research and clinical data transfer have primarily focused on fixed sub–data sets, such as marriage and childbearing history, and direct data entities like the number of pregnancies in patients’ EMRs. These approaches, known as fixed site recognition strategies, lead to biased patient inclusion and flawed data masking in RWE research. For pregnancy and gestation information, testing results and procedures can indicate pregnancy status and gestation incidence without explicit descriptions in diagnostic sheets. For instance, a surgical history of pregnancy termination can imply suction aspiration abortion, while pregnancy history can be inferred from clinical test results such as human chorionic gonadotropin (HCG) levels exceeding 10 ng/L or 25 IU/L [<xref ref-type="bibr" rid="ref13">13</xref>,<xref ref-type="bibr" rid="ref14">14</xref>].</p>
      <p>This study aims to propose protocols for the accurate and automatic extraction of pregnancy and gestation information from Chinese EMRs at the highest possible level of precision. Such information is crucial for patient inclusion and cohort identification in RWE studies to improve pregnancy outcomes [<xref ref-type="bibr" rid="ref15">15</xref>,<xref ref-type="bibr" rid="ref16">16</xref>]. Additionally, privacy protection strategies will be developed to maximize the masking of pregnancy data and identify the risk of privacy leakage for different sub–data sets within EMRs. To the best of our knowledge, this study is the first to identify the frequency of privacy information in Chinese EMRs. Then, the related risks can be considered when using patients’ EMRs for RWE research.</p>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Data Source</title>
        <p>This retrospective study uses the Chinese Renal Disease Data System (CRDS) database, a comprehensive national EMR database. The CRDS includes data from 19 tertiary referral hospitals across 10 provinces, representing the 5 geographical regions of China (North, Central, East, South, and Southwestern). Each hospital’s database covers the EMRs of all patients who visited from the start of 2010 to the end of 2020. The patient’s EMRs were not specially selected. Complete EMRs from each hospital were transferred to the central database located at Nanfang Hospital of Southern Medical University in Guangzhou. In this study, the total number of patients in the database is 15,245,055. All analyzed hospitalization records were structured based on the CRDS data model [<xref ref-type="bibr" rid="ref17">17</xref>].</p>
      </sec>
      <sec>
        <title>Sample Patient Inclusion</title>
        <p>In this study, female patients aged 14-50 years from January 1, 2010, to December 31, 2020, were selected from the CRDS database. The statistical time here was the patient’s last visit information (including all the previous visit history), and 70% (n=1,110,053) were randomly selected for statistical analysis.</p>
      </sec>
      <sec>
        <title>Extraction of Pregnancy and Gestation Information From Chinese EMRs</title>
        <p>Following a preliminary investigation of Chinese EMRs, and incorporating expert guidance, teaching materials, guidelines, and literature, the research team developed the Extraction Protocol of Pregnancy and Gestation Information (EPPGI). This protocol was refined through repeated sorting, adjustment, and verification, considering the writing characteristics of various hospital medical records. Traditional methods typically extract patient data using diagnosis codes from the diagnostic sheets of Chinese EMRs. However, we first developed identification rules for test and exam results, covering patients with positive HCG results in different units of measurement and pregnancy tests.</p>
        <p>Given the diversity and complexity of the medical coding system in Chinese EMRs, we used regular expressions (regex) to retrieve pregnancy and gestation information across entire EMRs rather than relying solely on diagnosis codes in specific sub–data sets. The adopted regex extended beyond diagnoses to include surgical procedures, chief complaints related to pregnancy status, and gestation histories. Besides, regex for medications related to inducing labor or miscarriage was used to assist in identifying pregnancy information. Other regex, including description of fetus and exclusion rules, was also applied. All regex search patterns were the product of expert meetings and discussions. The detailed rules and regex of EPPGI are listed in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p>
        <p>To implement this approach, we used R software (version 4.2.2; R Core Team) to extract females with reproductive activities (FRA) information from the checklist using regular expressions. In the following example, “final_medtech” represents the checklist, and “TECHNOLOGY_RESULT” is the field containing the check result in the checklist.</p>
        <disp-formula>
          <graphic xlink:href="jmir_v26i1e46455_fig4.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </disp-formula>
        <p>The rules of regex allowed us to describe the proportion of patients with a pregnancy history across different sub–data sets of EMRs and the frequency of pregnancy and gestation information. After removing duplicate patients from different sub–data sets, we retrieved data on pregnant women in the selected EMRs using EPPGI.</p>
      </sec>
      <sec>
        <title>Privacy Protection Strategies</title>
        <p>Based on the statistics of the located information, we proposed privacy protection strategies to avoid unnecessary and unintentional exposure of pregnancy and gestation information in real-world data analytics. Due to the different writing styles in medical records, insufficient desensitization may not fully cover sensitive patient information, while excessive desensitization may obscure other relevant information. First, EPPGI was used to identify keywords of sensitive reproductive information (SRI), such as “助产&#124;难产&#124;平产&#124;早产&#124;死产&#124;死胎” (“midwifery&#124;dystocia&#124; normal birth&#124;preterm birth&#124;stillbirth&#124;stillbirth”). With expert guidance, we finally chose to replace 15 characters before and after these keywords with asterisks (*) to desensitize sensitive information related to pregnancy and childbirth, thereby protecting patient privacy. This approach minimizes the possibility of inferring patients’ SRI from EMRs.</p>
        <p>In cases where the use of maternity-related information is unavoidable, the frequency of patient identification and privacy information was used to estimate the risk of unnecessary privacy exposure methodically. We also used diagnosis and marital history as criteria to locate maternity information and compared these results across 6 large sub–data sets of EMRs. A total of 2 independent reviewers (WL and HZ) inspected both methods to ensure accuracy and reliability.</p>
      </sec>
      <sec>
        <title>Manual Curation and Verification</title>
        <p>Afterward, included cases were randomly selected and manually reviewed by 2 independent groups of reviewers (CL, YJ, LS, WL, HZ, SN, and MG) to test the precision and recall of the data extraction. For the EPPGI, 1000 female cases were randomly assigned to 2 external experts (Aixin Guo and Wenna Liu) to manually extract SRI. The manually extracted results were then compared with the EPPGI results to evaluate the precision and recall rate, as defined below. We also compared the precision and recall rates of the EPPGI with those obtained using only maternal and diagnostic history.</p>
        <p>Additionally, the reviewers attempted to identify FRA in privacy-concealed data sets to test the success rate of the privacy protection strategies, as defined in <xref rid="figure1" ref-type="fig">Figure 1</xref>.</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>Formulas for precision, recall, and success rate. Precision is the ratio of correctly predicted positive observations to the total predicted positives, focusing on the accuracy of the positive predictions. Recall is the ratio of correctly predicted positive observations to all the actual positives, focusing on the ability to capture all actual positive cases. EMR: electronic medical record; EPPGI: Extraction Protocol of Pregnancy and Gestation Information; FRA: females with reproductive activities.</p>
          </caption>
          <graphic xlink:href="jmir_v26i1e46455_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Ethical Considerations</title>
        <p>This study was approved by the Medical Ethics Committee of Nanfang Hospital, Southern Medical University (approval NFEC-2019-213), which waived the requirement for patient-informed consent due to the retrospective nature of the study. This study was also approved by the China Office of Human Genetic Resources for Data Preservation Application (approval 2021-BC0037). This study complied with the Declaration of Helsinki and the STROBE (Strengthening the Reporting of Observational Studies in Epidemiology) statement.</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <p>To the best of our knowledge, this study is the first to identify the frequency of privacy information in Chinese EMRs.</p>
      <sec>
        <title>General Information of EMRs</title>
        <p>All patient data were extracted from the CRDS database, a real-world database that includes records from 19 hospitals. Based on the inclusion criteria (female patients aged 14-50 years from January 1, 2010, to December 31, 2020) and a 70% entry ratio, a total of 1,110,053 patients were selected as the EMR sample. It is worth noting that removing duplicates reduced the sample size from 2,377,582 to 1,585,801, which is due to multiple diagnostic records for individual patients. The admittance flowchart is shown in <xref rid="figure2" ref-type="fig">Figure 2</xref>, and detailed information is displayed in <xref ref-type="table" rid="table1">Table 1</xref>. According to Chinese national specifications for standard EMR structure, EMRs consist of similar sub–data sets with minor differences in nomenclature including doctor’s orders, diagnostic tables, test sheets, examination sheets, surgical sheets, and medical record texts. The medical record texts are further divided into 10 parts: course records, admission records, discharge records, referral records, consultation records, nursing records, death records, surgical notes, informed consent forms, and others. In CRDS, the admission record texts have been preprocessed using natural language processing for allergic history, chief complaint, disease history, tobacco and alcohol history, family history, marriage history, surgical history, and toxic exposure history. The general structure of Chinese EMRs is demonstrated in <xref rid="figure3" ref-type="fig">Figure 3</xref><italic>.</italic></p>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>Admittance flowchart. The initial database had 15,245,055 patients. After excluding male patients, specific years, teenagers, and older adults, removing duplicate patient records, and applying a 70% entry ratio, the final sample consisted of 1,110,053 observations. CRDS: chinese renal disease data system.</p>
          </caption>
          <graphic xlink:href="jmir_v26i1e46455_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>General information on EMRsa from 19 hospitals.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="180"/>
            <col width="310"/>
            <col width="170"/>
            <col width="170"/>
            <col width="170"/>
            <thead>
              <tr valign="top">
                <td>Hospital number</td>
                <td>City and area</td>
                <td>Total bed numbers</td>
                <td>FRA<sup>b</sup> (n=688,268)</td>
                <td>Total patients (n=1,110,053)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>1</td>
                <td>Guangzhou, Southern</td>
                <td>2225</td>
                <td>57,837</td>
                <td>102,483</td>
              </tr>
              <tr valign="top">
                <td>2</td>
                <td>Beijing, Northern</td>
                <td>1650</td>
                <td>30,410</td>
                <td>36,757</td>
              </tr>
              <tr valign="top">
                <td>3</td>
                <td>Jinan, Northern</td>
                <td>4000</td>
                <td>79,294</td>
                <td>94,339</td>
              </tr>
              <tr valign="top">
                <td>4</td>
                <td>Hangzhou, Eastern</td>
                <td>3200</td>
                <td>30,950</td>
                <td>70,523</td>
              </tr>
              <tr valign="top">
                <td>5</td>
                <td>Hangzhou, Eastern</td>
                <td>2400</td>
                <td>67,086</td>
                <td>82,977</td>
              </tr>
              <tr valign="top">
                <td>6</td>
                <td>Guangzhou, Southern</td>
                <td>3000</td>
                <td>53,355</td>
                <td>82,654</td>
              </tr>
              <tr valign="top">
                <td>7</td>
                <td>Shenzhen, Southern</td>
                <td>2000</td>
                <td>41,352</td>
                <td>48,269</td>
              </tr>
              <tr valign="top">
                <td>8</td>
                <td>Nanjing, Eastern</td>
                <td>2499</td>
                <td>33,709</td>
                <td>51,919</td>
              </tr>
              <tr valign="top">
                <td>9</td>
                <td>Shanghai, Eastern</td>
                <td>800</td>
                <td>499</td>
                <td>743</td>
              </tr>
              <tr valign="top">
                <td>10</td>
                <td>Chengdu, Southwestern</td>
                <td>1000</td>
                <td>21,803</td>
                <td>78,843</td>
              </tr>
              <tr valign="top">
                <td>11</td>
                <td>Hefei, Eastern</td>
                <td>3138</td>
                <td>61,044</td>
                <td>103,203</td>
              </tr>
              <tr valign="top">
                <td>12</td>
                <td>Wuhan, Central</td>
                <td>5613</td>
                <td>2555</td>
                <td>4858</td>
              </tr>
              <tr valign="top">
                <td>13</td>
                <td>Maoming, Southern</td>
                <td>2500</td>
                <td>64,299</td>
                <td>81,673</td>
              </tr>
              <tr valign="top">
                <td>14</td>
                <td>Guangzhou, Southern</td>
                <td>2247</td>
                <td>22,406</td>
                <td>54,663</td>
              </tr>
              <tr valign="top">
                <td>15</td>
                <td>Huizhou, Southern</td>
                <td>2156</td>
                <td>22,756</td>
                <td>23,181</td>
              </tr>
              <tr valign="top">
                <td>16</td>
                <td>Guiyang, Southwestern</td>
                <td>2000</td>
                <td>166</td>
                <td>6138</td>
              </tr>
              <tr valign="top">
                <td>17</td>
                <td>Foshan, Southern</td>
                <td>2200</td>
                <td>63,336</td>
                <td>125,085</td>
              </tr>
              <tr valign="top">
                <td>18</td>
                <td>Guangzhou, Southern</td>
                <td>3000</td>
                <td>6358</td>
                <td>20,100</td>
              </tr>
              <tr valign="top">
                <td>19</td>
                <td>Guangzhou, Southern</td>
                <td>1000</td>
                <td>29,053</td>
                <td>41,645</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table1fn1">
              <p><sup>a</sup>EMR: electronic medical record.</p>
            </fn>
            <fn id="table1fn2">
              <p><sup>b</sup>FRA: females with reproductive activities.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <fig id="figure3" position="float">
          <label>Figure 3</label>
          <caption>
            <p>Structure of Chinese EMRs. The EMRs consist of medical records, surgery, examination results, test results, medication, and diagnosis. CPOE: computerized physician order entry; CRDS: chinese renal disease data system; EMR: electronic medical record; HIS: hospital information system; LIS: laboratory information system; NLP: natural language processing.</p>
          </caption>
          <graphic xlink:href="jmir_v26i1e46455_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Number of FRA</title>
        <p>After the initial investigation, we applied the EPPGI to a sample of 1,110,053 female patients of childbearing age. This analysis covered 6 different categories of EMRs, with each sub–data set and its components processed separately. <xref ref-type="table" rid="table2">Table 2</xref> presents the total number of patients, the identified number of FRA, and their corresponding proportions.</p>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>Patient identified by EPPGIa. From left to right, the columns display the total number of patients, the number of identified FRAb, and their corresponding proportions.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="470"/>
            <col width="200"/>
            <col width="150"/>
            <col width="0"/>
            <col width="150"/>
            <thead>
              <tr valign="top">
                <td colspan="2">EMR<sup>c</sup> sub–data sets</td>
                <td>Patient number (per person)</td>
                <td colspan="2">Maternal patient number</td>
                <td>Percentage (%)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="2">Order</td>
                <td>955,140</td>
                <td colspan="2">146,555</td>
                <td>15.34</td>
              </tr>
              <tr valign="top">
                <td colspan="2">Diagnosis from the frontage</td>
                <td>1,073,167</td>
                <td colspan="2">312,008</td>
                <td>29.07</td>
              </tr>
              <tr valign="top">
                <td colspan="2">Laboratory report</td>
                <td>903,987</td>
                <td colspan="2">93,386</td>
                <td>10.33</td>
              </tr>
              <tr valign="top">
                <td colspan="2">Examine result</td>
                <td>852,143</td>
                <td colspan="2">172,735</td>
                <td>20.27</td>
              </tr>
              <tr valign="top">
                <td colspan="2">Prescription of surgical procedures in HIS<sup>d</sup>/CPOE<sup>e</sup> system</td>
                <td>767,693</td>
                <td colspan="2">157,027</td>
                <td>20.45</td>
              </tr>
              <tr valign="top">
                <td colspan="6">
                  <bold>Medical records</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Total</td>
                <td>588,963</td>
                <td>393,550</td>
                <td colspan="2">66.82</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Course records</td>
                <td>207,575</td>
                <td>95,012</td>
                <td colspan="2">45.77</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Discharge records</td>
                <td>330,909</td>
                <td>112,927</td>
                <td colspan="2">34.13</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Referral records</td>
                <td>9699</td>
                <td>2682</td>
                <td colspan="2">27.65</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Consultation records</td>
                <td>38,728</td>
                <td>12,639</td>
                <td colspan="2">32.64</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Nursing records</td>
                <td>192,080</td>
                <td>112,465</td>
                <td colspan="2">58.55</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Death records</td>
                <td>953</td>
                <td>105</td>
                <td colspan="2">11.02</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Surgical notes</td>
                <td>134,889</td>
                <td>43,280</td>
                <td colspan="2">32.09</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Informed consent</td>
                <td>238,014</td>
                <td>102,386</td>
                <td colspan="2">43.02</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Others</td>
                <td>411,637</td>
                <td>138,892</td>
                <td colspan="2">33.74</td>
              </tr>
              <tr valign="top">
                <td colspan="6">
                  <bold>Admission records (NLP<sup>f</sup>)</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Total</td>
                <td>376,176</td>
                <td>317,962</td>
                <td colspan="2">84.52</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Allergic history</td>
                <td>446,360</td>
                <td>0</td>
                <td colspan="2">0.00</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Chief complaint</td>
                <td>13,925</td>
                <td>55</td>
                <td colspan="2">0.39</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Disease history</td>
                <td>446,390</td>
                <td>11,312</td>
                <td colspan="2">2.53</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Tobacco and alcohol history</td>
                <td>490,028</td>
                <td>0</td>
                <td colspan="2">0.00</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Family history</td>
                <td>464,516</td>
                <td>6</td>
                <td colspan="2">0.00</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Marriage history</td>
                <td>467,184</td>
                <td>295,436</td>
                <td colspan="2">63.24</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Surgical history</td>
                <td>316,282</td>
                <td>74,118</td>
                <td colspan="2">23.43</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Toxic exposure history</td>
                <td>504,022</td>
                <td>4</td>
                <td colspan="2">0.00</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table2fn1">
              <p><sup>a</sup>EPPGI: Extraction Protocol of Pregnancy and Gestation Information.</p>
            </fn>
            <fn id="table2fn2">
              <p><sup>b</sup>FRA: females with reproductive activities.</p>
            </fn>
            <fn id="table2fn3">
              <p><sup>c</sup>EMR: electronic medical record.</p>
            </fn>
            <fn id="table2fn4">
              <p><sup>d</sup>HIS: hospital information system.</p>
            </fn>
            <fn id="table2fn5">
              <p><sup>e</sup>CPOE: computerized physician order entry.</p>
            </fn>
            <fn id="table2fn6">
              <p><sup>f</sup>NLP: natural language processing.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <p>The number of pregnancies identified solely by diagnosis was 312,008, accounting for 29.07% of the patients in the diagnostic sub–data set. The number of patients who were identified only by their marital and childbearing history was as high as 295,436, accounting for 26.61% of the total study population. The number of pregnancies identified by diagnosis and marital and childbearing history was 521,132, accounting for 46.95% of the total study population. If on the basis of diagnosis and marital history, the identification of diagnosis, examination, and other contents are added, the number of maternity information can be identified as 688,268, accounting for 62% of the total study population.</p>
        <p>In the text of medical records, 393,550 patients with SRI were identified, accounting for 66.82% of 588,963 records. Due to the presence of childbearing history, which constitutes the leading source of SRI, over 80% (n=317,962) of female patients in admission records were identified as FRA by EPPGI. Besides, 58.55% (n=112,465) of female patients were identified in nursing history, making it the second highest proportion of FRA.</p>
        <p>Based on these results, EPPGI effectively extracts FRA from every sub–data set within Chinese EMRs.</p>
      </sec>
      <sec>
        <title>Frequency of Recognition</title>
        <p>A single patient can generate multiple encounter records in the EMR system per visit. Therefore, individual EMRs were divided into separate records based on visits, reflecting the actual EMR storage in RWE studies. <xref ref-type="table" rid="table3">Table 3</xref> presents the frequency of pregnancy information identification across different sub–data sets of Chinese EMRs. Similar to the results from per-patient records, SRI can be widely identified in each sub–data set of EMRs. SRI is primarily concentrated on diagnosis records, surgical records, and medical records text. In diagnosis records, SRI could be extracted from 15.06% of 15,497,063 records. In surgical records, 11.49% of SRI could be extracted from 1,604,579 records. The text of medical records showed the highest frequency of SRI identification, with an overall recognition rate of 29.92%. Additionally, it is noteworthy that more than 80% of admission records contained SRI.</p>
        <table-wrap position="float" id="table3">
          <label>Table 3</label>
          <caption>
            <p>Frequency of pregnancy information identification. From left to right, the columns display the total number of records, the frequency of identified FRA<sup>a</sup>, and their corresponding proportions.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="450"/>
            <col width="0"/>
            <col width="210"/>
            <col width="0"/>
            <col width="130"/>
            <col width="0"/>
            <col width="180"/>
            <thead>
              <tr valign="top">
                <td colspan="3">EMR<sup>b</sup> sub–data sets</td>
                <td colspan="2">Record number (per visit)</td>
                <td colspan="2">Maternal record number</td>
                <td>Percentage (%)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="3">Order</td>
                <td colspan="2">93,182,790</td>
                <td colspan="2">384,699</td>
                <td>0.41</td>
              </tr>
              <tr valign="top">
                <td colspan="3">Diagnosis from the frontage</td>
                <td colspan="2">15,497,063</td>
                <td colspan="2">2,334,160</td>
                <td>15.06</td>
              </tr>
              <tr valign="top">
                <td colspan="3">Laboratory report</td>
                <td colspan="2">102,509,232</td>
                <td colspan="2">285,245</td>
                <td>0.28</td>
              </tr>
              <tr valign="top">
                <td colspan="3">Examine result</td>
                <td colspan="2">6,790,300</td>
                <td colspan="2">549,078</td>
                <td>8.09</td>
              </tr>
              <tr valign="top">
                <td colspan="3">Prescription of surgical procedures in HIS<sup>c</sup>/CPOE<sup>d</sup> system</td>
                <td colspan="2">1,604,579</td>
                <td colspan="2">184,335</td>
                <td>11.49</td>
              </tr>
              <tr valign="top">
                <td colspan="8">
                  <bold>Medical records</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Total</td>
                <td colspan="2">8,473,462</td>
                <td colspan="2">2,534,940</td>
                <td colspan="2">29.92</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Course records</td>
                <td colspan="2">2,132,926</td>
                <td colspan="2">527,915</td>
                <td colspan="2">24.75</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Discharge records</td>
                <td colspan="2">532,790</td>
                <td colspan="2">151,352</td>
                <td colspan="2">28.41</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Referral records</td>
                <td colspan="2">25,171</td>
                <td colspan="2">6737</td>
                <td colspan="2">26.76</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Consultation records</td>
                <td colspan="2">151,564</td>
                <td colspan="2">43,695</td>
                <td colspan="2">28.83</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Nursing records</td>
                <td colspan="2">965,042</td>
                <td colspan="2">268,586</td>
                <td colspan="2">27.83</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Death records</td>
                <td colspan="2">2250</td>
                <td colspan="2">166</td>
                <td colspan="2">7.38</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Surgical notes</td>
                <td colspan="2">482,578</td>
                <td colspan="2">106,656</td>
                <td colspan="2">22.10</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Informed consent</td>
                <td colspan="2">1,226,875</td>
                <td colspan="2">326,284</td>
                <td colspan="2">26.59</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Others</td>
                <td colspan="2">2,377,080</td>
                <td colspan="2">637,807</td>
                <td colspan="2">26.83</td>
              </tr>
              <tr valign="top">
                <td colspan="8">
                  <bold>Admission records (NLP</bold>
                  <sup>e</sup>
                  <bold>)</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Total</td>
                <td colspan="2">577,186</td>
                <td colspan="2">465,742</td>
                <td colspan="2">80.69</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Allergic history</td>
                <td colspan="2">1,183,577</td>
                <td colspan="2">0</td>
                <td colspan="2">0.00</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Chief complaint</td>
                <td colspan="2">45,987</td>
                <td colspan="2">59</td>
                <td colspan="2">0.13</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Disease history</td>
                <td colspan="2">4,166,715</td>
                <td colspan="2">15,057</td>
                <td colspan="2">0.36</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Tobacco and alcohol history</td>
                <td colspan="2">858,066</td>
                <td colspan="2">0</td>
                <td colspan="2">0.00</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Family history</td>
                <td colspan="2">2,076,062</td>
                <td colspan="2">6</td>
                <td colspan="2">0.00</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Marriage history</td>
                <td colspan="2">708,544</td>
                <td colspan="2">444,559</td>
                <td colspan="2">62.74</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Surgical history</td>
                <td colspan="2">553,774</td>
                <td colspan="2">106,846</td>
                <td colspan="2">19.29</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Toxic exposure history</td>
                <td colspan="2">3,954,196</td>
                <td colspan="2">4</td>
                <td colspan="2">0.00</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table3fn1">
              <p><sup>a</sup>FRA: females with reproductive activities.</p>
            </fn>
            <fn id="table3fn2">
              <p><sup>b</sup>EMR: electronic medical record.</p>
            </fn>
            <fn id="table3fn3">
              <p><sup>c</sup>HIS: hospital information system.</p>
            </fn>
            <fn id="table3fn4">
              <p><sup>d</sup>CPOE: computerized physician order entry.</p>
            </fn>
            <fn id="table3fn5">
              <p><sup>e</sup>NLP: natural language processing.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Precision and Recall Rate</title>
        <p>During the manual curation and certification process, 1000 complete EMRs were randomly selected from the sample patients and reviewed by 2 independent medical experts (Aixin Guo and Wenna Liu) to determine maternal status. The precision and recall rates of the EPPGI were 100% and 99.68%, respectively. When only diagnosis history and marital history were used for identification, the accuracy rate remained 100%, but the recall rate dropped to 73.35%. For details, see <xref ref-type="table" rid="table4">Tables 4</xref> and <xref ref-type="table" rid="table5">5</xref>, where “0” represents patients without FRA information and “1” represents patients with FRA information.</p>
        <table-wrap position="float" id="table4">
          <label>Table 4</label>
          <caption>
            <p>Confusion matrix of EPPGI<sup>a</sup> method.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="320"/>
            <col width="450"/>
            <col width="230"/>
            <thead>
              <tr valign="top">
                <td>Prediction</td>
                <td colspan="2">Reference</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>0</td>
                <td>1</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>0</td>
                <td>377</td>
                <td>2</td>
              </tr>
              <tr valign="top">
                <td>1</td>
                <td>0</td>
                <td>621</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table4fn1">
              <p><sup>a</sup>EPPGI: Extraction Protocol of Pregnancy and Gestation Information.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <table-wrap position="float" id="table5">
          <label>Table 5</label>
          <caption>
            <p>Confusion matrix of the method using only diagnosis and marital history</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="320"/>
            <col width="450"/>
            <col width="230"/>
            <thead>
              <tr valign="top">
                <td>Prediction</td>
                <td colspan="2">Reference</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>0</td>
                <td>1</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>0</td>
                <td>377</td>
                <td>166</td>
              </tr>
              <tr valign="top">
                <td>1</td>
                <td>0</td>
                <td> 457</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
        <p>We also conducted analyses by time and region, as shown in <xref ref-type="table" rid="table6">Tables 6</xref> and <xref ref-type="table" rid="table7">7</xref>. In these tables, “quality inspection” refers to patients assessed by 2 expert manual reviews for quality control to determine the presence of labor process information (from different hospital sources); “EPPGI” refers to patients assessed using EPPGI for maternity information; and “diagnosis history and marital history” refers to patients assessed using diagnosis and marital history for fertility information. In these tables, 0 represents “no maternity information” and 1 represents “there is maternity information.” The results indicated that similar to the overall comparison, the identification of maternal information using the EPPGI method was superior to using diagnosis and marital history alone. By examining the results across different hospitals and time periods, our method proved to be universally applicable across various years and regions.</p>
        <table-wrap position="float" id="table6">
          <label>Table 6</label>
          <caption>
            <p>FRA<sup>a</sup> identification results of different privacy methods in different hospitals (1 being “there is maternity information” and 0 being “no maternity information”).</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="310"/>
            <col width="110"/>
            <col width="120"/>
            <col width="110"/>
            <col width="120"/>
            <col width="110"/>
            <col width="120"/>
            <thead>
              <tr valign="top">
                <td>Hospital number</td>
                <td colspan="2">Quality inspection</td>
                <td colspan="2">EPPGI<sup>b</sup></td>
                <td colspan="2">Diagnosis history and marital history</td>
              </tr>
              <tr valign="top">
                <td/>
                <td>0</td>
                <td>1</td>
                <td>0</td>
                <td>1</td>
                <td>0</td>
                <td>1</td>
              </tr>
              <tr valign="top">
                <td/>
                <td/>
                <td/>
                <td/>
                <td/>
                <td/>
                <td/>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>1</td>
                <td>40</td>
                <td>49</td>
                <td>40</td>
                <td>49</td>
                <td>57</td>
                <td>32</td>
              </tr>
              <tr valign="top">
                <td>10</td>
                <td>57</td>
                <td>18</td>
                <td>57</td>
                <td>18</td>
                <td>62</td>
                <td>13</td>
              </tr>
              <tr valign="top">
                <td>11</td>
                <td>40</td>
                <td>54</td>
                <td>40</td>
                <td>54</td>
                <td>45</td>
                <td>49</td>
              </tr>
              <tr valign="top">
                <td>12</td>
                <td>3</td>
                <td>4</td>
                <td>3</td>
                <td>4</td>
                <td>3</td>
                <td>4</td>
              </tr>
              <tr valign="top">
                <td>13</td>
                <td>15</td>
                <td>64</td>
                <td>16</td>
                <td>63</td>
                <td>42</td>
                <td>37</td>
              </tr>
              <tr valign="top">
                <td>14</td>
                <td>29</td>
                <td>16</td>
                <td>29</td>
                <td>16</td>
                <td>29</td>
                <td>16</td>
              </tr>
              <tr valign="top">
                <td>15</td>
                <td>0</td>
                <td>17</td>
                <td>0</td>
                <td>17</td>
                <td>3</td>
                <td>14</td>
              </tr>
              <tr valign="top">
                <td>16</td>
                <td>2</td>
                <td>1</td>
                <td>2</td>
                <td>1</td>
                <td>3</td>
                <td>0</td>
              </tr>
              <tr valign="top">
                <td>17</td>
                <td>48</td>
                <td>55</td>
                <td>48</td>
                <td>55</td>
                <td>71</td>
                <td>32</td>
              </tr>
              <tr valign="top">
                <td>18</td>
                <td>17</td>
                <td>7</td>
                <td>17</td>
                <td>7</td>
                <td>17</td>
                <td>7</td>
              </tr>
              <tr valign="top">
                <td>19</td>
                <td>6</td>
                <td>31</td>
                <td>6</td>
                <td>31</td>
                <td>9</td>
                <td>28</td>
              </tr>
              <tr valign="top">
                <td>2</td>
                <td>4</td>
                <td>28</td>
                <td>4</td>
                <td>28</td>
                <td>10</td>
                <td>22</td>
              </tr>
              <tr valign="top">
                <td>3</td>
                <td>11</td>
                <td>62</td>
                <td>11</td>
                <td>62</td>
                <td>13</td>
                <td>60</td>
              </tr>
              <tr valign="top">
                <td>4</td>
                <td>40</td>
                <td>33</td>
                <td>41</td>
                <td>32</td>
                <td>55</td>
                <td>18</td>
              </tr>
              <tr valign="top">
                <td>5</td>
                <td>15</td>
                <td>57</td>
                <td>15</td>
                <td>57</td>
                <td>33</td>
                <td>39</td>
              </tr>
              <tr valign="top">
                <td>6</td>
                <td>29</td>
                <td>53</td>
                <td>29</td>
                <td>53</td>
                <td>56</td>
                <td>26</td>
              </tr>
              <tr valign="top">
                <td>7</td>
                <td>8</td>
                <td>45</td>
                <td>8</td>
                <td>45</td>
                <td>13</td>
                <td>40</td>
              </tr>
              <tr valign="top">
                <td>8</td>
                <td>13</td>
                <td>29</td>
                <td>13</td>
                <td>29</td>
                <td>22</td>
                <td>20</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table6fn1">
              <p><sup>a</sup>FRA: females with reproductive activities.</p>
            </fn>
            <fn id="table6fn2">
              <p><sup>b</sup>EPPGI: Extraction Protocol of Pregnancy and Gestation Information.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <table-wrap position="float" id="table7">
          <label>Table 7</label>
          <caption>
            <p>FRA<sup>a</sup> recognition results for different privacy methods in different years (1 being “there is maternity information” and 0 being “no maternity information”).</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="310"/>
            <col width="110"/>
            <col width="120"/>
            <col width="110"/>
            <col width="120"/>
            <col width="110"/>
            <col width="120"/>
            <thead>
              <tr valign="top">
                <td>Year</td>
                <td colspan="2">Quality inspection</td>
                <td colspan="2">EPPGI<sup>b</sup></td>
                <td colspan="2">Diagnosis history and marital history</td>
              </tr>
              <tr valign="top">
                <td/>
                <td>0</td>
                <td>1</td>
                <td>0</td>
                <td>1</td>
                <td>0</td>
                <td>1</td>
              </tr>
              <tr valign="top">
                <td/>
                <td/>
                <td/>
                <td/>
                <td/>
                <td/>
                <td/>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>2010</td>
                <td>14</td>
                <td>9</td>
                <td>14</td>
                <td>9</td>
                <td>19</td>
                <td>4</td>
              </tr>
              <tr valign="top">
                <td>2011</td>
                <td>18</td>
                <td>10</td>
                <td>18</td>
                <td>10</td>
                <td>23</td>
                <td>5</td>
              </tr>
              <tr valign="top">
                <td>2012</td>
                <td>19</td>
                <td>18</td>
                <td>19</td>
                <td>18</td>
                <td>29</td>
                <td>8</td>
              </tr>
              <tr valign="top">
                <td>2013</td>
                <td>42</td>
                <td>40</td>
                <td>42</td>
                <td>40</td>
                <td>55</td>
                <td>27</td>
              </tr>
              <tr valign="top">
                <td>2014</td>
                <td>46</td>
                <td>59</td>
                <td>46</td>
                <td>59</td>
                <td>60</td>
                <td>45</td>
              </tr>
              <tr valign="top">
                <td>2015</td>
                <td>33</td>
                <td>75</td>
                <td>34</td>
                <td>74</td>
                <td>55</td>
                <td>53</td>
              </tr>
              <tr valign="top">
                <td>2016</td>
                <td>42</td>
                <td>93</td>
                <td>42</td>
                <td>93</td>
                <td>69</td>
                <td>66</td>
              </tr>
              <tr valign="top">
                <td>2017</td>
                <td>63</td>
                <td>130</td>
                <td>64</td>
                <td>129</td>
                <td>95</td>
                <td>98</td>
              </tr>
              <tr valign="top">
                <td>2018</td>
                <td>60</td>
                <td>107</td>
                <td>60</td>
                <td>107</td>
                <td>86</td>
                <td>81</td>
              </tr>
              <tr valign="top">
                <td>2019</td>
                <td>27</td>
                <td>58</td>
                <td>27</td>
                <td>58</td>
                <td>36</td>
                <td>49</td>
              </tr>
              <tr valign="top">
                <td>2020</td>
                <td>13</td>
                <td>24</td>
                <td>13</td>
                <td>24</td>
                <td>16</td>
                <td>21</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table7fn1">
              <p><sup>a</sup>FRA: females with reproductive activities.</p>
            </fn>
            <fn id="table7fn2">
              <p><sup>b</sup>EPPGI: Extraction Protocol of Pregnancy and Gestation Information.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Privacy Protection Strategies</title>
        <p>The privacy-protection strategies were developed based on the above results. Given that we used regular expressions to identify SRI, additional text surrounding the recognized maternity information needs to be concealed to prevent privacy exposure through context. We randomly selected 1000 EMRs of pregnancy patients for static data desensitization to create a masked sample of EMRs. A total of 2 independent reviewers (Aixin Guo and Wenna Liu) were assigned to manually extract any form of pregnancy and gestation information from the masked samples. Furthermore, the risk of unnecessary privacy exposure was stratified by the frequency of recognition. The text of medical records, having the highest recognition frequency, should be handled with the utmost caution. In contrast, test and examination records are less frequently identified with SRI. It is important to note that the frequency of recognition does not fully represent the risk of privacy leakage, which will be further analyzed in the discussion section.</p>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <sec>
          <title>Overview</title>
          <p>This study is one of the first large-scale investigations into privacy leakage and FRA identification of Chinese EMRs, focusing on the frequency of recognition. The originality of this work can be summarized in 3 key aspects.</p>
        </sec>
        <sec>
          <title>Originality in Exploring New Observations</title>
          <p>The accessibility of EMR inevitably leads to uneven privacy protection awareness among different EMR users. The importance of reliable privacy protection methods has been extensively discussed in the literature, emphasizing their critical role in the successful implementation of EMRs in health care institutions [<xref ref-type="bibr" rid="ref18">18</xref>,<xref ref-type="bibr" rid="ref19">19</xref>]. Sensitive information regarding pregnancy, gestation, and abortion is routinely included in EMRs, raising concerns about unnecessary exposure [<xref ref-type="bibr" rid="ref20">20</xref>,<xref ref-type="bibr" rid="ref21">21</xref>]. In 2021, the Personal Information Protection Law of the People’s Republic of China came into effect, which clarified the rights and responsibilities related to the use of personal privacy information [<xref ref-type="bibr" rid="ref22">22</xref>]. However, prior to this study, there has been little to no effort to address the highest standards of patient privacy protection protocols during RWE studies. To the best of our knowledge, this is the first study in China to use a national-level EMR database to quantitatively evaluate the exposure risk of privacy information related to women’s reproductive health. This study aims to enhance protection strategies in this area.</p>
        </sec>
        <sec>
          <title>Originality in Designing New Experiments</title>
          <p>The attributes and structure of Chinese EMRs are unique in terms of terminology and data standards. Accurate and comprehensive recognition of maternity information is widely reported to play a critical role in effective privacy protection and the evaluation of RWE [<xref ref-type="bibr" rid="ref23">23</xref>,<xref ref-type="bibr" rid="ref24">24</xref>]. While researchers have been working to improve the accuracy of SRI identification in non-Chinese EMRs [<xref ref-type="bibr" rid="ref25">25</xref>,<xref ref-type="bibr" rid="ref26">26</xref>], to the best of our knowledge, no prior research has focused on the accurate and complete extraction of FRA from Chinese EMRs. Traditional diagnosis-based patient extraction protocols typically use diagnosis codes, such as the International Classification of Diseases, which have 2 major limitations.</p>
          <p>First, the records in the diagnosis sheet are often incomplete. Due to inconsistencies in Chinese EMR documentation, physicians do not always record pregnancy and gestation information as a diagnosis, especially when the patient’s primary complaint is unrelated to maternity. This leads to lower recall rates and potential recall bias. Second, due to the complexity and inconsistency of coding systems in Chinese EMRs, using codes for patient identification is more complicated than using regex, and it is nearly impossible to list all encodings exhaustively. Furthermore, regex can be widely adopted across different sub–data sets of Chinese EMRs. Although the diagnostic sheet contains the major SRI, most Chinese EMRs are still stored in text format without code mapping.</p>
          <p>Compared to traditional diagnosis-based patient inclusion methods, the EPPGI method provides more precise results in a practical manner. Whether patient- or visit-based records, EPPGI extracts significantly more FRA with a high precision rate.</p>
        </sec>
        <sec>
          <title>Originality in Contributing New Knowledge</title>
          <p>Our results demonstrate that traditional fixed-site data masking procedures lead to considerable unnecessary exposure of privacy information. For instance, patients’ HCG test results or delivery procedures are commonly recorded in sub–data sets that cannot simply be concealed during RWE studies and clinical use. The combination of pregnancy and gestation information can even infer the incidence of abortion, which is highly confidential in China. Accurate and complete recognition of maternity information is essential for flawless privacy protection.</p>
          <p>EPPGI method first identified pregnancy and gestation information across entire Chinese EMRs. For the identified information, it is practical and convenient to use data desensitization techniques, including data invalidation, data offset, and symmetric encryption, to prevent the misuse of private data. Based on the EMRs in CRDS, we determined the optimal length of additional concealed text to retain most medical information. Additionally, the quantified recognition frequency of pregnancy and gestation information helps researchers use EMRs wisely to avoid unnecessary privacy leakage. Although the frequency of identification cannot fully determine the risk of privacy leakage, which is also associated with the complexities of data desensitization, these results highlight the richness of private information ingrained in EMRs.</p>
          <p>From a data asset management perspective, quantifying the risk of privacy leakage is critical under the strict Personal Information Protection Law. Based on statistical results and actual data mining practices, SRI is widely stored in Chinese EMRs, requiring data desensitization when using any EMR sub–data sets. For test results and structured data, the difficulty of data desensitization is relatively lower than that for plain text medical records, given the explicit nature of sensitive data entities and the low probability of reinference from context. Similar to the hazard classification of chemicals, health care data users should be aware of the richness of private information and the risk of unnecessary privacy exposure in EMRs. Maternity information is considered one of the most sensitive types of privacy for women, and our results provide a crucial reference for data users to assess related risks in Chinese EMRs for the first time.</p>
        </sec>
      </sec>
      <sec>
        <title>Limitations</title>
        <p>Overall, this work provides justification for assessing privacy leakage risk and offers a reference for effective privacy protection in Chinese EMRs. However, the proposed study has several limitations. First, the frequency of sensitive information and the privacy risk estimated in our case study are primarily based on the EMRs of a renal disease database. While there are official directions and guidelines for composing EMRs in China [<xref ref-type="bibr" rid="ref9">9</xref>], discrepancies exist between the CRDS and other data networks in terms of data structure and operating environment. Specific protocols and variables should be optimized for generalizations.</p>
        <p>Furthermore, the study is limited by its data scale, covering only 688,268 FRA in the CRDS. This limited scope suggests the need for further research involving larger data sets to validate and refine our findings.</p>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>Finding an effective and practical way to protect private information in EMRs is both meaningful and useful. We have demonstrated the feasibility of applying the EPPGI method to EMRs from 19 hospitals in different regions. We believe that EPPGI can provide a valuable reference for patient inclusion in any maternity-related studies using Chinese EMRs. Our protocols, designed for Chinese EMR systems, enable the accurate and complete recognition and extraction of pregnancy and gestation data, ensuring its effective protection. Compared to traditional methods of FRA inclusion, the EPPGI method provides more comprehensive results.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>The detailed rules and regex of EPPGI (Extraction Protocol of Pregnancy and Gestation Information).</p>
        <media xlink:href="jmir_v26i1e46455_app1.docx" xlink:title="DOCX File , 17 KB"/>
      </supplementary-material>
      <supplementary-material id="app2">
        <label>Multimedia Appendix 2</label>
        <p>Source code.</p>
        <media xlink:href="jmir_v26i1e46455_app2.zip" xlink:title="ZIP File  (Zip Archive), 12 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">CRDS</term>
          <def>
            <p>Chinese Renal Disease Data System</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">EMR</term>
          <def>
            <p>electronic medical record</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">EPPGI</term>
          <def>
            <p>Extraction Protocol of Pregnancy and Gestation Information</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">FRA</term>
          <def>
            <p>females with reproductive activities</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">HCG</term>
          <def>
            <p>human chorionic gonadotropin</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">RWE</term>
          <def>
            <p>real-world evidence</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">SRI</term>
          <def>
            <p>sensitive reproductive information</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">STROBE</term>
          <def>
            <p>Strengthening the Reporting of Observational Studies in Epidemiology</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>This work was supported by the National Key Research and Development Program of China (2021YFC2500200 and 2023YFC2706305). This work was supported by the Multi-modality Data Integration and Application Lab of Guangdong Medical University and the National Clinical Research Center for Geriatric Disorders (Huashan). We did not use generative artificial intelligence in any portion of the manuscript writing.</p>
    </ack>
    <notes>
      <sec>
        <title>Data Availability</title>
        <p>The data sets analyzed during this study are not publicly available due to the sensitivity of hospitals’ data but are available from the corresponding author upon reasonable request. The source code is available in <xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>.</p>
      </sec>
    </notes>
    <fn-group>
      <fn fn-type="con">
        <p>MG, CL, and SN conceived and designed the study. LS collected the data. YJ and WL drafted the initial manuscript. HZ and YY integrated and revised the manuscript. MG and SN served as co–corresponding authors. CL, YJ and LS served as co–first authors.</p>
      </fn>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Patrick</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Towards a privacy access control model for e-Healthcare services</article-title>
          <source>DBLP</source>
          <year>2005</year>
          <conf-name>Conference on Privacy, Security and Trust</conf-name>
          <conf-date>October 12, 2005</conf-date>
          <conf-loc>Canada</conf-loc>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.semanticscholar.org/paper/Towards-a-Privacy-Access-Control-Model-for-Services-Hung/e219290f8dc650863831718e97dd0e5e6b2064a2"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Johari</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Jadhav</surname>
              <given-names>U</given-names>
            </name>
          </person-group>
          <article-title>Abortion rights judgment: a ray of hope!</article-title>
          <source>Indian J Med Ethics</source>
          <year>2017</year>
          <volume>2</volume>
          <issue>3</issue>
          <fpage>180</fpage>
          <lpage>183</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.20529/IJME.2017.044"/>
          </comment>
          <pub-id pub-id-type="doi">10.20529/IJME.2017.044</pub-id>
          <pub-id pub-id-type="medline">28279947</pub-id>
          <pub-id pub-id-type="pii">5149</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Uterhark</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>International law and the legalization of abortion in Northern Ireland</article-title>
          <source>J Law Health</source>
          <year>2020</year>
          <volume>34</volume>
          <issue>1</issue>
          <fpage>155</fpage>
          <lpage>189</lpage>
          <pub-id pub-id-type="medline">33449459</pub-id>
          <pub-id pub-id-type="pii">j34/1/155</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Påfs</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Rulisa</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Klingberg-Allvin</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Binder-Finnema</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Musafili</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Essén</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>Implementing the liberalized abortion law in Kigali, Rwanda: ambiguities of rights and responsibilities among health care providers</article-title>
          <source>Midwifery</source>
          <year>2020</year>
          <volume>80</volume>
          <fpage>102568</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S0266-6138(19)30259-1"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.midw.2019.102568</pub-id>
          <pub-id pub-id-type="medline">31698295</pub-id>
          <pub-id pub-id-type="pii">S0266-6138(19)30259-1</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rigdon</surname>
              <given-names>SM</given-names>
            </name>
          </person-group>
          <article-title>Abortion law and practice in China: an overview with comparisons to the United States</article-title>
          <source>Soc Sci Med</source>
          <year>1996</year>
          <volume>42</volume>
          <issue>4</issue>
          <fpage>543</fpage>
          <lpage>560</lpage>
          <pub-id pub-id-type="doi">10.1016/0277-9536(95)00173-5</pub-id>
          <pub-id pub-id-type="medline">8643980</pub-id>
          <pub-id pub-id-type="pii">0277953695001735</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Liang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Shen</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Zheng</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Tang</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Lei</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Adoption of electronic health records (EHRs) in China during the past 10 years: consecutive survey data analysis and comparison of Sino-American challenges and experiences</article-title>
          <source>J Med Internet Res</source>
          <year>2021</year>
          <volume>23</volume>
          <issue>2</issue>
          <fpage>e24813</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2021/2/e24813/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/24813</pub-id>
          <pub-id pub-id-type="medline">33599615</pub-id>
          <pub-id pub-id-type="pii">v23i2e24813</pub-id>
          <pub-id pub-id-type="pmcid">PMC7932845</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hager</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Lindblad</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Brommels</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Salomonsson</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Wannheden</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Sharing patient-controlled real-world data through the application of the theory of commons: action research case study</article-title>
          <source>J Med Internet Res</source>
          <year>2021</year>
          <volume>23</volume>
          <issue>1</issue>
          <fpage>e16842</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2021/1/e16842/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/16842</pub-id>
          <pub-id pub-id-type="medline">33464212</pub-id>
          <pub-id pub-id-type="pii">v23i1e16842</pub-id>
          <pub-id pub-id-type="pmcid">PMC7854041</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Festag</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Spreckelsen</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Privacy-preserving deep learning for the detection of protected health information in real-world data: comparative evaluation</article-title>
          <source>JMIR Form Res</source>
          <year>2020</year>
          <volume>4</volume>
          <issue>5</issue>
          <fpage>e14064</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://formative.jmir.org/2020/5/e14064/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/14064</pub-id>
          <pub-id pub-id-type="medline">32369025</pub-id>
          <pub-id pub-id-type="pii">v4i5e14064</pub-id>
          <pub-id pub-id-type="pmcid">PMC7238077</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="web">
          <source>Typical cases of public interest litigation for the protection of women's rights</source>
          <access-date>2024-07-30</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.spp.gov.cn/xwfbh/dxal/202211/t20221125_593721.shtml">https://www.spp.gov.cn/xwfbh/dxal/202211/t20221125_593721.shtml</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="web">
          <article-title>Regulations of Guangdong Province on the management of maternal and child health care</article-title>
          <source>People's Government of Guangdong Province</source>
          <access-date>2024-07-17</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.gd.gov.cn/zwgk/wjk/zcfgk/content/post_2524333.html">https://www.gd.gov.cn/zwgk/wjk/zcfgk/content/post_2524333.html</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="web">
          <article-title>Technical specification of hospital information platform based on electronic medical record</article-title>
          <source>National Health Commission of the People's Republic of China</source>
          <year>2014</year>
          <access-date>2024-07-17</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.nhc.gov.cn/wjw/s9497/201406/a2014514701f4e76b14f3446f6318937.shtml">http://www.nhc.gov.cn/wjw/s9497/201406/a2014514701f4e76b14f3446f6318937.shtml</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Z</given-names>
            </name>
          </person-group>
          <article-title>Data integration of electronic medical record under administrative decentralization of medical insurance and healthcare in China: a case study</article-title>
          <source>Isr J Health Policy Res</source>
          <year>2019</year>
          <volume>8</volume>
          <issue>1</issue>
          <fpage>24</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://ijhpr.biomedcentral.com/articles/10.1186/s13584-019-0293-9"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s13584-019-0293-9</pub-id>
          <pub-id pub-id-type="medline">30929644</pub-id>
          <pub-id pub-id-type="pii">10.1186/s13584-019-0293-9</pub-id>
          <pub-id pub-id-type="pmcid">PMC6442402</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cervinski</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Lockwood</surname>
              <given-names>CM</given-names>
            </name>
            <name name-style="western">
              <surname>Ferguson</surname>
              <given-names>AM</given-names>
            </name>
            <name name-style="western">
              <surname>Odem</surname>
              <given-names>RR</given-names>
            </name>
            <name name-style="western">
              <surname>Stenman</surname>
              <given-names>UH</given-names>
            </name>
            <name name-style="western">
              <surname>Alfthan</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Grenache</surname>
              <given-names>DG</given-names>
            </name>
            <name name-style="western">
              <surname>Gronowski</surname>
              <given-names>AM</given-names>
            </name>
          </person-group>
          <article-title>Qualitative point-of-care and over-the-counter urine hCG devices differentially detect the hCG variants of early pregnancy</article-title>
          <source>Clin Chim Acta</source>
          <year>2009</year>
          <volume>406</volume>
          <issue>1-2</issue>
          <fpage>81</fpage>
          <lpage>85</lpage>
          <pub-id pub-id-type="doi">10.1016/j.cca.2009.05.018</pub-id>
          <pub-id pub-id-type="medline">19477170</pub-id>
          <pub-id pub-id-type="pii">S0009-8981(09)00304-0</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Montagnana</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Trenti</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Aloe</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Cervellin</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Lippi</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Human chorionic gonadotropin in pregnancy diagnostics</article-title>
          <source>Clin Chim Acta</source>
          <year>2011</year>
          <volume>412</volume>
          <issue>17-18</issue>
          <fpage>1515</fpage>
          <lpage>1520</lpage>
          <pub-id pub-id-type="doi">10.1016/j.cca.2011.05.025</pub-id>
          <pub-id pub-id-type="medline">21635878</pub-id>
          <pub-id pub-id-type="pii">S0009-8981(11)00300-7</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Farland</surname>
              <given-names>LV</given-names>
            </name>
            <name name-style="western">
              <surname>Prescott</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Sasamoto</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Tobias</surname>
              <given-names>DK</given-names>
            </name>
            <name name-style="western">
              <surname>Gaskins</surname>
              <given-names>AJ</given-names>
            </name>
            <name name-style="western">
              <surname>Stuart</surname>
              <given-names>JJ</given-names>
            </name>
            <name name-style="western">
              <surname>Carusi</surname>
              <given-names>DA</given-names>
            </name>
            <name name-style="western">
              <surname>Chavarro</surname>
              <given-names>JE</given-names>
            </name>
            <name name-style="western">
              <surname>Horne</surname>
              <given-names>AW</given-names>
            </name>
            <name name-style="western">
              <surname>Rich-Edwards</surname>
              <given-names>JW</given-names>
            </name>
            <name name-style="western">
              <surname>Missmer</surname>
              <given-names>SA</given-names>
            </name>
          </person-group>
          <article-title>Endometriosis and risk of adverse pregnancy outcomes</article-title>
          <source>Obstet Gynecol</source>
          <year>2019</year>
          <volume>134</volume>
          <issue>3</issue>
          <fpage>527</fpage>
          <lpage>536</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/31403584"/>
          </comment>
          <pub-id pub-id-type="doi">10.1097/AOG.0000000000003410</pub-id>
          <pub-id pub-id-type="medline">31403584</pub-id>
          <pub-id pub-id-type="pii">00006250-201909000-00014</pub-id>
          <pub-id pub-id-type="pmcid">PMC6922084</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Attali</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Yogev</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>The impact of advanced maternal age on pregnancy outcome</article-title>
          <source>Best Pract Res Clin Obstet Gynaecol</source>
          <year>2021</year>
          <volume>70</volume>
          <fpage>2</fpage>
          <lpage>9</lpage>
          <pub-id pub-id-type="doi">10.1016/j.bpobgyn.2020.06.006</pub-id>
          <pub-id pub-id-type="medline">32773291</pub-id>
          <pub-id pub-id-type="pii">S1521-6934(20)30096-1</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="web">
          <source>China Renal Data System</source>
          <access-date>2024-07-17</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.crds-network.org.cn/#/database">http://www.crds-network.org.cn/#/database</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kuo</surname>
              <given-names>KM</given-names>
            </name>
            <name name-style="western">
              <surname>Ma</surname>
              <given-names>CC</given-names>
            </name>
            <name name-style="western">
              <surname>Alexander</surname>
              <given-names>JW</given-names>
            </name>
          </person-group>
          <article-title>How do patients respond to violation of their information privacy?</article-title>
          <source>Health Inf Manag</source>
          <year>2014</year>
          <volume>43</volume>
          <issue>2</issue>
          <fpage>23</fpage>
          <lpage>33</lpage>
          <pub-id pub-id-type="doi">10.1177/183335831404300204</pub-id>
          <pub-id pub-id-type="medline">24948663</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Moore</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Frye</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Review of HIPAA, part 1: history, protected health information, and privacy and security rules</article-title>
          <source>J Nucl Med Technol</source>
          <year>2019</year>
          <volume>47</volume>
          <issue>4</issue>
          <fpage>269</fpage>
          <lpage>272</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://tech.snmjournals.org/cgi/pmidlookup?view=long&#38;pmid=31182664"/>
          </comment>
          <pub-id pub-id-type="doi">10.2967/jnmt.119.227819</pub-id>
          <pub-id pub-id-type="medline">31182664</pub-id>
          <pub-id pub-id-type="pii">jnmt.119.227819</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hackett</surname>
              <given-names>KM</given-names>
            </name>
            <name name-style="western">
              <surname>Kazemi</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Sellen</surname>
              <given-names>DW</given-names>
            </name>
          </person-group>
          <article-title>Keeping secrets in the cloud: mobile phones, data security and privacy within the context of pregnancy and childbirth in Tanzania</article-title>
          <source>Soc Sci Med</source>
          <year>2018</year>
          <volume>211</volume>
          <fpage>190</fpage>
          <lpage>197</lpage>
          <pub-id pub-id-type="doi">10.1016/j.socscimed.2018.06.014</pub-id>
          <pub-id pub-id-type="medline">29960170</pub-id>
          <pub-id pub-id-type="pii">S0277-9536(18)30322-8</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Patient privacy and autonomy: a comparative analysis of cases of ethical dilemmas in China and the United States</article-title>
          <source>BMC Med Ethics</source>
          <year>2021</year>
          <volume>22</volume>
          <issue>1</issue>
          <fpage>8</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcmedethics.biomedcentral.com/articles/10.1186/s12910-021-00579-6"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12910-021-00579-6</pub-id>
          <pub-id pub-id-type="medline">33531011</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12910-021-00579-6</pub-id>
          <pub-id pub-id-type="pmcid">PMC7856764</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <collab>NA</collab>
          </person-group>
          <article-title>The National People's Congress of the People's Republic of China</article-title>
          <year>2021</year>
          <conf-name>Data Security Law of the People's Republic of China</conf-name>
          <conf-date>July 2017, 2024</conf-date>
          <conf-loc>Beijing</conf-loc>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.npc.gov.cn/npc/c2/c30834/202108/t20210820_313088.html"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gokhale</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Stürmer</surname>
              <given-names>Til</given-names>
            </name>
            <name name-style="western">
              <surname>Buse</surname>
              <given-names>JB</given-names>
            </name>
          </person-group>
          <article-title>Real-world evidence: the devil is in the detail</article-title>
          <source>Diabetologia</source>
          <year>2020</year>
          <volume>63</volume>
          <issue>9</issue>
          <fpage>1694</fpage>
          <lpage>1705</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/32666226"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/s00125-020-05217-1</pub-id>
          <pub-id pub-id-type="medline">32666226</pub-id>
          <pub-id pub-id-type="pii">10.1007/s00125-020-05217-1</pub-id>
          <pub-id pub-id-type="pmcid">PMC7448554</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Qi</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Sun</surname>
              <given-names>X</given-names>
            </name>
          </person-group>
          <article-title>Toward a better understanding about real-world evidence</article-title>
          <source>Eur J Hosp Pharm</source>
          <year>2022</year>
          <volume>29</volume>
          <issue>1</issue>
          <fpage>8</fpage>
          <lpage>11</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://ejhp.bmj.com/lookup/pmidlookup?view=long&#38;pmid=34857642"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/ejhpharm-2021-003081</pub-id>
          <pub-id pub-id-type="medline">34857642</pub-id>
          <pub-id pub-id-type="pii">ejhpharm-2021-003081</pub-id>
          <pub-id pub-id-type="pmcid">PMC8717805</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Angras</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Boyd</surname>
              <given-names>VE</given-names>
            </name>
            <name name-style="western">
              <surname>Gray</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Young</surname>
              <given-names>AJ</given-names>
            </name>
            <name name-style="western">
              <surname>Paglia</surname>
              <given-names>MJ</given-names>
            </name>
            <name name-style="western">
              <surname>Mackeen</surname>
              <given-names>AD</given-names>
            </name>
          </person-group>
          <article-title>Retrospective application of algorithms to improve identification of pregnancy outcomes from the electronic health record</article-title>
          <source>J Perinatol</source>
          <year>2023</year>
          <volume>43</volume>
          <issue>1</issue>
          <fpage>10</fpage>
          <lpage>14</lpage>
          <pub-id pub-id-type="doi">10.1038/s41372-022-01496-1</pub-id>
          <pub-id pub-id-type="medline">36050515</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41372-022-01496-1</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Moll</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Wong</surname>
              <given-names>HL</given-names>
            </name>
            <name name-style="western">
              <surname>Fingar</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Hobbi</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Sheng</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Burrell</surname>
              <given-names>TA</given-names>
            </name>
            <name name-style="western">
              <surname>Eckert</surname>
              <given-names>LO</given-names>
            </name>
            <name name-style="western">
              <surname>Munoz</surname>
              <given-names>FM</given-names>
            </name>
            <name name-style="western">
              <surname>Baer</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Shoaibi</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Anderson</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Validating claims-based algorithms determining pregnancy outcomes and gestational age using a linked claims-electronic medical record database</article-title>
          <source>Drug Saf</source>
          <year>2021</year>
          <volume>44</volume>
          <issue>11</issue>
          <fpage>1151</fpage>
          <lpage>1164</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/34591264"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/s40264-021-01113-8</pub-id>
          <pub-id pub-id-type="medline">34591264</pub-id>
          <pub-id pub-id-type="pii">10.1007/s40264-021-01113-8</pub-id>
          <pub-id pub-id-type="pmcid">PMC8481319</pub-id>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
