<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">ResProt</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR Res Protoc</journal-id>
      <journal-title>JMIR Research Protocols</journal-title>
      <issn pub-type="epub">1929-0748</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v12i1e46471</article-id>
      <article-id pub-id-type="pmid">37566443</article-id>
      <article-id pub-id-type="doi">10.2196/46471</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Protocol</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Protocol</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Data Quality– and Utility-Compliant Anonymization of Common Data Model–Harmonized Electronic Health Record Data: Protocol for a Scoping Review</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Mavragani</surname>
            <given-names>Amaryllis</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Ferrari</surname>
            <given-names>Davide</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Johnson</surname>
            <given-names>Owen</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Mungoli</surname>
            <given-names>Neelesh</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Kamdje Wabo</surname>
            <given-names>Gaetan</given-names>
          </name>
          <degrees>MSc</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>Department of Biomedical Informatics</institution>
            <institution>Center for Preventive Medicine and Digital Health Baden-Württemberg</institution>
            <institution>Mannheim Medical Faculty of the University of Heidelberg</institution>
            <addr-line>Theodor-Kutzer-Ufer 1-3, House 3, Floor 4</addr-line>
            <addr-line>Mannheim, 68167</addr-line>
            <country>Germany</country>
            <phone>49 621 383 8088</phone>
            <email>gaetankamdje.wabo@medma.uni-heidelberg.de</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-1053-6162</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author">
          <name name-style="western">
            <surname>Prasser</surname>
            <given-names>Fabian</given-names>
          </name>
          <degrees>Dr rer nat</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-3172-3095</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Gierend</surname>
            <given-names>Kerstin</given-names>
          </name>
          <degrees>Dipl Inf</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-0417-3454</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Siegel</surname>
            <given-names>Fabian</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-9673-5030</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author">
          <name name-style="western">
            <surname>Ganslandt</surname>
            <given-names>Thomas</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff4" ref-type="aff">4</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-6864-8936</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Department of Biomedical Informatics</institution>
        <institution>Center for Preventive Medicine and Digital Health Baden-Württemberg</institution>
        <institution>Mannheim Medical Faculty of the University of Heidelberg</institution>
        <addr-line>Mannheim</addr-line>
        <country>Germany</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Berlin Institute of Health at Charité</institution>
        <institution>Universitätsmedizin Berlin</institution>
        <addr-line>Berlin</addr-line>
        <country>Germany</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>Department of Urology and Urosurgery</institution>
        <institution>University Medical Center Mannheim</institution>
        <institution>Mannheim Medical Faculty of the University of Heidelberg</institution>
        <addr-line>Mannheim</addr-line>
        <country>Germany</country>
      </aff>
      <aff id="aff4">
        <label>4</label>
        <institution>Chair of Medical Informatics</institution>
        <institution>Friedrich-Alexander-Universität Erlangen-Nürnberg</institution>
        <addr-line>Erlangen</addr-line>
        <country>Germany</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Gaetan Kamdje Wabo <email>gaetankamdje.wabo@medma.uni-heidelberg.de</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <year>2023</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>11</day>
        <month>8</month>
        <year>2023</year>
      </pub-date>
      <volume>12</volume>
      <elocation-id>e46471</elocation-id>
      <history>
        <date date-type="received">
          <day>13</day>
          <month>2</month>
          <year>2023</year>
        </date>
        <date date-type="rev-request">
          <day>24</day>
          <month>4</month>
          <year>2023</year>
        </date>
        <date date-type="rev-recd">
          <day>31</day>
          <month>5</month>
          <year>2023</year>
        </date>
        <date date-type="accepted">
          <day>28</day>
          <month>6</month>
          <year>2023</year>
        </date>
      </history>
      <copyright-statement>©Gaetan Kamdje Wabo, Fabian Prasser, Kerstin Gierend, Fabian Siegel, Thomas Ganslandt. Originally published in JMIR Research Protocols (https://www.researchprotocols.org), 11.08.2023.</copyright-statement>
      <copyright-year>2023</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Research Protocols, is properly cited. The complete bibliographic information, a link to the original publication on https://www.researchprotocols.org, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://www.researchprotocols.org/2023/1/e46471" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>The anonymization of Common Data Model (CDM)–converted EHR data is essential to ensure the data privacy in the use of harmonized health care data. However, applying data anonymization techniques can significantly affect many properties of the resulting data sets and thus biases research results. Few studies have reviewed these applications with a reflection of approaches to manage data utility and quality concerns in the context of CDM-formatted health care data.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>Our intended scoping review aims to identify and describe (1) how formal anonymization methods are carried out with CDM-converted health care data, (2) how data quality and utility concerns are considered, and (3) how the various CDMs differ in terms of their suitability for recording anonymized data.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>The planned scoping review is based on the framework of Arksey and O'Malley. By using this, only articles published in English will be included. The retrieval of literature items should be based on a literature search string combining keywords related to data anonymization, CDM standards, and data quality assessment. The proposed literature search query should be validated by a librarian, accompanied by manual searches to include further informal sources. Eligible articles will first undergo a deduplication step, followed by the screening of titles. Second, a full-text reading will allow the 2 reviewers involved to reach the final decision about article selection, while a domain expert will support the resolution of citation selection conflicts. Additionally, key information will be extracted, categorized, summarized, and analyzed by using a proposed template into an iterative process. Tabular and graphical analyses should be addressed in alignment with the PRISMA-ScR (Preferred Reporting Items for Systematic Reviews and Meta-Analyses Extension for Scoping Reviews) checklist. We also performed some tentative searches on Web of Science for estimating the feasibility of reaching eligible articles.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>Tentative searches on Web of Science resulted in 507 nonduplicated matches, suggesting the availability of (potential) relevant articles. Further analysis and selection steps will allow us to derive a final literature set. Furthermore, the completion of this scoping review study is expected by the end of the fourth quarter of 2023.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>Outlining the approaches of applying formal anonymization methods on CDM-formatted health care data while taking into account data quality and utility concerns should provide useful insights to understand the existing approaches and future research direction based on identified gaps. This protocol describes a schedule to perform a scoping review, which should support the conduction of follow-up investigations.</p>
        </sec>
        <sec sec-type="registered-report">
          <title>International Registered Report Identifier (IRRID)</title>
          <p>PRR1-10.2196/46471</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>EHR</kwd>
        <kwd>electronic health record</kwd>
        <kwd>data quality</kwd>
        <kwd>common data model</kwd>
        <kwd>data standard</kwd>
        <kwd>data privacy models</kwd>
        <kwd>data anonymization</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <p>The anonymization of health data is a key approach for preserving patient anonymity during the secondary use of relational (ie, tabular) electronic health record (EHR) data [<xref ref-type="bibr" rid="ref1">1</xref>]. However, to overcome the challenges related to the considerable heterogeneity in clinical data source systems (eg, due to diverse medical data coding frameworks, heterogeneous definitions of laboratory data values, or disparate setting- or task-dependent metadata), the use of common data models (CDMs) has been proposed and discussed [<xref ref-type="bibr" rid="ref2">2</xref>]. Converting structured or unstructured source data to CDM standards helps to reach an understanding of commonly harmonized data into collaborative network research [<xref ref-type="bibr" rid="ref3">3</xref>] and hence facilitates the cross-institutional exchange of medical data by using appropriate CDM metadata [<xref ref-type="bibr" rid="ref2">2</xref>]. By approaching this, anonymization of CDM-converted EHR data promises patient privacy–secured sharing and analysis of harmonized data, which requires specific data anonymization components.</p>
      <p>Extensive efforts describing the conduction [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref4">4</xref>-<xref ref-type="bibr" rid="ref13">13</xref>] of data anonymization exist, and it is essential to differentiate and properly address 3 major aspects when dealing with relational data anonymization (anonymization of tabular data). This includes privacy models, data transformation models, and data utility models for assessing and ensuring the fitness of anonymous data for use. In terms of proposed privacy models, the k-anonymity privacy model [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref7">7</xref>] is one of the most widely used models. It consists of placing at least <italic>k</italic> patients in an equivalence class with the same patient-identifying data element values (so-called quasi-identifiers; eg, birthdate and zip code), so that the probability of reidentifying a patient becomes <italic>1/k</italic>. The value of the threshold <italic>k</italic> is determined by the data owner (eg, a hospital department sharing the data) depending on the size of the data and privacy protection level [<xref ref-type="bibr" rid="ref1">1</xref>]. Because of the limitations of this model for fully protecting sensitive information (eg, patient health insurance and treating medical doctor), the <italic>l</italic>-diversity privacy model [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref8">8</xref>] was proposed. This ensures that at least <italic>l</italic>-“well-represented” values for sensitive data elements are presented within each equivalent class. Furthermore, additional data privacy models including the <italic>t</italic>-closeness privacy model [<xref ref-type="bibr" rid="ref9">9</xref>] (for preventing linkage of the record and data elements) and the differential privacy model [<xref ref-type="bibr" rid="ref10">10</xref>] (for preventing table linkage and probabilistic attacks) were also addressed. The strengths and limitations of these models were discussed in depth and extensively by Majeed and Lee [<xref ref-type="bibr" rid="ref1">1</xref>] and Lei et al [<xref ref-type="bibr" rid="ref11">11</xref>]. For implementing the data privacy models on data, a corresponding data transformation model is required, which may include a variety of technical operations. These comprise, for instance, generalization (by replacing some data values with parent values), suppression (implementing data record, value, or cell suppression), permutation (partitioning data records into dissociated groups), perturbation (partly or totally replacing original data with synthetic data), or anatomization (dissociating the relationships among patient-identifying data elements) [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref11">11</xref>,<xref ref-type="bibr" rid="ref13">13</xref>]. Implementing the privacy- and data transformation models mentioned above leads to high impact on the quality of anonymous data in terms of utility. Nonetheless, utility models including metrics such as accuracy or error rate, the <italic>F</italic>-measure, precision, and recall have been proposed to assess the utility of anonymous data for special purposes [<xref ref-type="bibr" rid="ref1">1</xref>]. Furthermore, the weighted certainty penalty, generalized information loss, the global loss penalty, relative error, or information theoretical metrics have also been recommended to estimate the utility of anonymous data for general purposes [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref12">12</xref>,<xref ref-type="bibr" rid="ref13">13</xref>]. In addition, further evidence-based recommendations on how to assess and report on EHR data quality have been proposed [<xref ref-type="bibr" rid="ref14">14</xref>-<xref ref-type="bibr" rid="ref18">18</xref>] (eg, 3×3 data quality assessment guidelines [<xref ref-type="bibr" rid="ref16">16</xref>], the framework of Kahn et al [<xref ref-type="bibr" rid="ref15">15</xref>], or that of Fox et al [<xref ref-type="bibr" rid="ref18">18</xref>]), and tools for data anonymization, transformation, and utility models have been proposed and discussed [<xref ref-type="bibr" rid="ref4">4</xref>].</p>
      <p>Among others, by using CDM standards in the clinical context, related source data can be more efficiently reused, organized, described, validated, searched, and queried [<xref ref-type="bibr" rid="ref2">2</xref>]. International standards such as Fast Health Interoperability Resources (FHIR) [<xref ref-type="bibr" rid="ref19">19</xref>] and CDM frameworks including the Informatics for Integrating Biology &#38; the Bedside (i2b2) TranSMART CDM [<xref ref-type="bibr" rid="ref20">20</xref>], the Observational Medical Outcomes Partnership’s Observational Health Data Sciences and Informatics (OMOP OHDSI) CDM [<xref ref-type="bibr" rid="ref21">21</xref>], the Patient-Centered Outcomes Research network (PCORNet) CDM [<xref ref-type="bibr" rid="ref22">22</xref>], and the Clinical Data Interchange Standards Consortium’s (CDISC’s) Operational Data Model (ODM) [<xref ref-type="bibr" rid="ref23">23</xref>] therefore gained widespread attention in the scientific community in the last decades. For instance, the Medical Informatics in Research and Care in University Medicine (MIRACUM) consortium of the German Medical Informatics Initiative [<xref ref-type="bibr" rid="ref24">24</xref>,<xref ref-type="bibr" rid="ref25">25</xref>] presents an illustrative deployment of some of these CDMs.</p>
      <p>While the interoperable conversion and querying of source EHR data into multiple CDM formats has been demonstrated [<xref ref-type="bibr" rid="ref26">26</xref>], it is nonetheless worth noting that an entire transformation of health care data from the original data format to CDMs, or from one CDM to another one, is barely practicable [<xref ref-type="bibr" rid="ref2">2</xref>]. This leads to potential challenges related to data completeness in the context of the use of CDM-converted health care data. Moreover, the relational anonymization of CDM-converted data by using the k-anonymity or <italic>l</italic>-diversity privacy models might build an interesting lever to allow patient privacy–preserved sharing of harmonized health care data as shown by Almeida et al [<xref ref-type="bibr" rid="ref6">6</xref>] and in a recent study by Pitoglou et al [<xref ref-type="bibr" rid="ref27">27</xref>]. Nonetheless, the anonymization of health care data can disproportionally affect the quality of resulting anonymous data sets due to information loss, and hence their suitability for medical research, as investigated by Langarizadeh et al [<xref ref-type="bibr" rid="ref28">28</xref>] and Ferrão et al [<xref ref-type="bibr" rid="ref29">29</xref>]. Especially in the case of CDM-converted data, anonymization may affect both cardinalities and completeness requirements of the respective CDM data models. This can be observed, for example, by the suppression of mandatory fields or by generalization through entering of ranges (eg, age range) into fields that only allow numeric values (not interval). Moreover, once CDM-converted data have been anonymized, it would be relevant to ensure whether the generated anonymous data may at all be stored in conformity with the CDM structures, or if it would be necessary to adapt the CDM specifications (eg, through some slicing in FHIR specifying both the exact and range-based anonymous age). This indicates the need for a thorough investigation of the suitability of CDM databases to record anonymized data in a quality-compliant format.</p>
      <p>This raises problems related to how anonymization-assisted preservation of patient privacy in using or sharing of CDM-harmonized health care data with a reflection of anonymous data utility is addressed, and whether CDMs differ in terms of their ability to record anonymized data. Despite the large range of studies performed in the fields of relational data anonymization [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref4">4</xref>-<xref ref-type="bibr" rid="ref13">13</xref>], CDM standards [<xref ref-type="bibr" rid="ref19">19</xref>-<xref ref-type="bibr" rid="ref22">22</xref>,<xref ref-type="bibr" rid="ref30">30</xref>,<xref ref-type="bibr" rid="ref31">31</xref>], and frameworks for medical data quality assessments [<xref ref-type="bibr" rid="ref15">15</xref>-<xref ref-type="bibr" rid="ref17">17</xref>,<xref ref-type="bibr" rid="ref32">32</xref>], little attention has been paid to an extensive review of the existing literature addressing these questions. Reviewing the existing evidence concerning these issues might aid in identifying, describing, and understanding how relational data are anonymized, evaluated, and documented into specific CDM databases and to what extent the utility and quality of the obtained anonymous data are addressed. There could be some gaps in data utility research to be considered when anonymizing specific CDM-transformed clinical data for specific data mining scenarios such as predictive analysis or machine learning for improving health care quality. The evidence and identified gaps should serve as support for further investigations in the field of utility-compliant anonymizing of harmonized health care data.</p>
      <p>Given this research scope, we plan to conduct a scoping review that aims to identify and describe (1) the current status and challenges of implementing formal privacy models (eg, k-anonymization, <italic>l</italic>-diversity, differential privacy, or <italic>t</italic>-closeness) on CDM databases (including i2b2, OMOP, CDISC, PCORnet, and FHIR), (2) the strategies used there to ensure the quality and utility of anonymized data, and (3) the differences in multiple CDM standards in relation to their suitability to record and document anonymized data.</p>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Ethical Considerations</title>
        <p>No ethics approvals are required since the planned study is only concerned with the assessment of the literature within a specific domain. Hence, no sensitive patient-identifying data will be processed.</p>
      </sec>
      <sec>
        <title>Schedule</title>
        <p>For conducting this scoping review study, we will use the methodological framework of Arksey and O’Malley [<xref ref-type="bibr" rid="ref33">33</xref>], which recommends an analysis process based on 5 steps: step 1—identifying the research question, step 2—identifying the relevant studies, step 3—selecting studies, step 4—extracting and charting data, and step 5—collating, summarizing, and reporting the results. Below, we describe the methodology’s stepwise concepts and the planned and already implemented in-between steps.</p>
      </sec>
      <sec>
        <title>Step 1—Identification of the Research Questions</title>
        <p>As a prelude, an initial exploration of the literature was manually carried out to gain an overview of the issues regarding data quality and data anonymization as well as to determine the appropriate keywords to be included. A search was undertaken using a combination of the search terms “data quality,” “anonymi*ation,” and “deidentification,” and by querying the literature platforms PubMed and Web of Science Core Collection. The most relevant articles were selected and analyzed upon full-text reading. To form the final research questions, we additionally addressed an explicit focus on the most internationally adopted CDMs (including i2b2 TranSMART, OMOP OHDSI, PCORnet, and CDISC ODM) and the FHIR standard. The research questions were derived by considering both the research objectives stated above.</p>
        <p>In doing so, the planned scoping review investigation will address the following 3 research questions: how are formal anonymization methods carried out with CDM-converted health care data and which challenges are observed? How are data quality and utility concerns considered during the anonymization of CDM-converted health care data? How does anonymization affect the specifications of different CDM data models, and which differences are observable in the CDMs regarding their suitability for recording and documenting anonymized data?</p>
      </sec>
      <sec>
        <title>Step 2—Identifying the Relevant Studies</title>
        <sec>
          <title>Overview</title>
          <p>To identify the most relevant articles matching the research questions, we will explore a large set of articles by taking into account the literature databases to be used, language considerations, key concepts for retrieving the literature items, and construction of the search query. Additionally, here we show the designed query we tentatively implemented on Web of Science.</p>
        </sec>
        <sec>
          <title>Literature Databases</title>
          <p>The literature search should be performed by querying the literature engines PubMed and Web of Science Core Collection. These literature search engines cover an extended range of medical and health informatics–related studies, and the latter additionally includes the fields of biomedical sciences and engineering, which are of high relevance for retrieving relevant data anonymization of related papers. Similar review projects considered the Web of Science Core Collection database as well [<xref ref-type="bibr" rid="ref34">34</xref>,<xref ref-type="bibr" rid="ref35">35</xref>].</p>
        </sec>
        <sec>
          <title>Article Language Considerations</title>
          <p>We will include articles published in English for facilitating the selection and screening of identified literature items.</p>
        </sec>
        <sec>
          <title>Key Concepts and Search Terms</title>
          <p>To efficiently find suitable articles, we have proposed 3 categories (concepts) of search terms, reflecting each of the relevant investigation domains of the study objective. The proposed set of search terms can be extended and documented, if necessary, during the literature extraction process.</p>
          <p>While the first category (A) relates to data anonymization methods, the second one (B) captures the field of medical CDMs and data standards, and the last one (C) covers the domain of data quality and utility assessment. <xref ref-type="table" rid="table1">Table 1</xref> provides an overview of the key concepts and the explicit search terms.</p>
          <table-wrap position="float" id="table1">
            <label>Table 1</label>
            <caption>
              <p>Key concepts.</p>
            </caption>
            <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
              <col width="200"/>
              <col width="400"/>
              <col width="400"/>
              <thead>
                <tr valign="top">
                  <td>Key concepts</td>
                  <td>Search terms</td>
                  <td>Investigation domains</td>
                </tr>
              </thead>
              <tbody>
                <tr valign="top">
                  <td>A<break/>  <break/>  
              <break/>  <break/>  <break/>  
              <break/>  <break/>  <break/>  
              <break/>  <break/>  <break/>  
              <break/>  <break/>  <break/>  
              <break/>  <break/>  <break/>  
              <break/>  <break/>  <break/>  
              <break/>  <break/>  <break/>  </td>
                  <td>
                    <list list-type="bullet">
                      <list-item>
                        <p>Deidentification/ De-identification</p>
                      </list-item>
                      <list-item>
                        <p>k-anonymity</p>
                      </list-item>
                      <list-item>
                        <p>t-closeness</p>
                      </list-item>
                      <list-item>
                        <p>l-diversity</p>
                      </list-item>
                      <list-item>
                        <p>Differential privacy</p>
                      </list-item>
                      <list-item>
                        <p>De-identified</p>
                      </list-item>
                      <list-item>
                        <p>Data masking</p>
                      </list-item>
                      <list-item>
                        <p>Data generalization</p>
                      </list-item>
                      <list-item>
                        <p>Data perturbation</p>
                      </list-item>
                      <list-item>
                        <p>Data permutation</p>
                      </list-item>
                      <list-item>
                        <p>Data suppression</p>
                      </list-item>
                      <list-item>
                        <p>Data anatomization</p>
                      </list-item>
                    </list>
                    <break/>
                  </td>
                  <td>Formal data anonymization</td>
                </tr>
                <tr valign="top">
                  <td>B</td>
                  <td>
                    <list list-type="bullet">
                      <list-item>
                        <p>i2b2</p>
                      </list-item>
                      <list-item>
                        <p>TranSMART</p>
                      </list-item>
                      <list-item>
                        <p>OMOP</p>
                      </list-item>
                      <list-item>
                        <p>OHDSI</p>
                      </list-item>
                      <list-item>
                        <p>CDISC ODM</p>
                      </list-item>
                      <list-item>
                        <p>PCORnet</p>
                      </list-item>
                      <list-item>
                        <p>FHIR</p>
                      </list-item>
                    </list>
                  </td>
                  <td>Medical research CDMs<sup>a</sup> or data standard</td>
                </tr>
                <tr valign="top">
                  <td>C</td>
                  <td>
                    <list list-type="bullet">
                      <list-item>
                        <p>Data quality</p>
                      </list-item>
                      <list-item>
                        <p>Data accuracy</p>
                      </list-item>
                      <list-item>
                        <p>Data utility</p>
                      </list-item>
                      <list-item>
                        <p>Data fidelity</p>
                      </list-item>
                      <list-item>
                        <p>Fitness for use</p>
                      </list-item>
                      <list-item>
                        <p>Fitness for purpose</p>
                      </list-item>
                    </list>
                  </td>
                  <td>Assessment of quality or utility of data</td>
                </tr>
              </tbody>
            </table>
            <table-wrap-foot>
              <fn id="table1fn1">
                <p><sup>a</sup>CDM: common data model.</p>
              </fn>
            </table-wrap-foot>
          </table-wrap>
        </sec>
        <sec>
          <title>Search Query Construction</title>
          <p>Based on the defined key concepts and search terms, we built a search string by combining the domain of formal data anonymization with those of CDM standards and data quality by using corresponding “AND” and “OR” Boolean operators.</p>
          <p>The final search string is built using the following key concept combination:</p>
          <p>
            <disp-formula>Search query = A AND (B OR C)</disp-formula>
          </p>
          <p>The proposed citation search query is documented in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p>
        </sec>
      </sec>
      <sec>
        <title>Step 3—Study Selection</title>
        <p>After the collection of articles meeting the eligibility criteria, a diligent selection process will be followed. This will be based on independent reviews by 2 experts, while a third expert will ensure that a compromise is achieved in case of selection conflicts. Two major stages will constitute this paper selection process.</p>
        <p>First, a general screening review based on the title and abstracts of each article will be carried out in order to exclude all references not useful to achieve the targeted research objective.</p>
        <p>In the second phase, a content review will be conducted via a full-text reading of each remaining citation included, to determine their final eligibility by considering their relevance for responding to the research questions. In addition, we will document and provide a list of all excluded articles in a complementary appendix.</p>
        <p>These 2 phases will be implemented independently by the 2 citation reviewers by using the free web-based application Rayyan [<xref ref-type="bibr" rid="ref36">36</xref>]. This application supports the traceable management of the inputs of the different contributing stakeholders and transparent conflict management [<xref ref-type="bibr" rid="ref36">36</xref>]. Thus, any conflict regarding the final decision about the inclusion or exclusion of a reference will be discussed and decided under consideration of the both reviewers’ viewpoints and input from the independent expert; this will be followed by interactive literature explorations within the Rayyan platform in a nonblinded form. Finally, a detailed description of the literature selection process and conflict management will be provided using a PRISMA-ScR (Preferred Reporting Items for Systematic Reviews and Meta-Analyses Extension for Scoping Reviews) flowchart [<xref ref-type="bibr" rid="ref33">33</xref>].</p>
      </sec>
      <sec>
        <title>Step 4—Extracting and Charting the Data</title>
        <p>We will extract from each of the selected articles all relevant information (including metadata) and record these into a template-based documentation, so that a subsequent descriptive analysis (including information visualization) can be performed by using an appropriate statistics package. A general template has been provisionally proposed (see <xref ref-type="table" rid="table2">Table 2</xref>) considering approaches from similar review projects [<xref ref-type="bibr" rid="ref34">34</xref>]. Updates on this template will be iteratively and collaboratively integrated, in accordance with requirements during the review, taking into account the concrete relevance for responding to the research objectives.</p>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>Template to extract key information form the included articles.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="500"/>
            <col width="500"/>
            <thead>
              <tr valign="top">
                <td>Metadata</td>
                <td>Description</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Citation details</td>
                <td>Name of first author and coauthors, digital object ID, and journal name</td>
              </tr>
              <tr valign="top">
                <td>Year of publication</td>
                <td>Year of publication of the article in a valid year format (eg, YYYY)</td>
              </tr>
              <tr valign="top">
                <td>Study type<break/>  <break/>  </td>
                <td>Use case, framework development, evaluation, etc</td>
              </tr>
              <tr valign="top">
                <td>Study location<break/>  <break/>  </td>
                <td>Continent, country, or city hosting the study</td>
              </tr>
              <tr valign="top">
                <td>Institute</td>
                <td>Research institution of the first author</td>
              </tr>
              <tr valign="top">
                <td>Funding source</td>
                <td>Public, industry, or missing</td>
              </tr>
              <tr valign="top">
                <td>Aims of the study</td>
                <td>Objective of the study</td>
              </tr>
              <tr valign="top">
                <td>Methodology (including technical implementation)</td>
                <td>Methods, techniques, models, framework, or approach implemented to achieve the research aims</td>
              </tr>
              <tr valign="top">
                <td>Study populations (if described in the article)</td>
                <td>Targeted research cohort, built on the basis of corresponding eligibility criteria</td>
              </tr>
              <tr valign="top">
                <td>Summary of outcome measures</td>
                <td>Summarizing the study results</td>
              </tr>
              <tr valign="top">
                <td>Limitations or gaps</td>
                <td>Strength and limitations of the study</td>
              </tr>
              <tr valign="top">
                <td>Important results associated with research question 1</td>
                <td>Description of formal relational data anonymization processes on CDM<sup>a</sup>-converted health care data</td>
              </tr>
              <tr valign="top">
                <td>Important results associated with research question 2</td>
                <td>Description of existing evidence to address anonymous data quality and utility: description of implemented strategies and description of observable gaps</td>
              </tr>
              <tr valign="top">
                <td>Important results associated with research question 3</td>
                <td>Description of differences in CDMs regarding how data anonymization modifies the specified table’s granularity and how anonymized data are there recorded</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table2fn1">
              <p><sup>a</sup>CDM: common data model.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Step 5—Collating, Summarizing, and Reporting the Results</title>
        <p>We will carry out a narrative quantitative analysis of findings using a 2-way analytical framework [<xref ref-type="bibr" rid="ref33">33</xref>], which will include a descriptive and thematic-based approach. This will generate comprehensive results, outlining the current evidence and research gaps related to the research questions. In doing so, we will first describe the implementation of data anonymization on FHIR- and CDM-formatted data, which include i2b2 TranSMART, OMOP OHDSI, PCORnet, and the CDISC. This will be accompanied by an analysis of deployment to ensure strategies for quality and utility assessment of anonymous data obtained, to present the current state of the art, and identify open research aspects. In addition, the effects of data anonymization on CDM specifications will be presented and discussed.</p>
        <p>Furthermore, corresponding comparison tables and graphs (PRISMA-ScR model–oriented) will be presented. Second, the findings will be organized, analyzed, and discussed in accordance with the 2 research questions. A thematically oriented illustration will be additionally generated.</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <p>Following the methodological elements, outlined in steps 2 (identifying the relevant studies) and 3 (study selection), we were able to generate a set of search keywords and design an appropriate literature search query. Furthermore, a tentative execution of this query on Web of Science resulted in the detection of 507 matching publications. In alignment with the presented methodology, these articles will be interactively scrutinized by the experts in order to gain relevant information regarding the research questions. This preparatory work will support the transparent execution of this scoping review study. In doing so, we intend to implement the full extraction of the literature and to proceed with the full execution of the review study by the end of the fourth quarter of 2023.</p>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <p>During the planning stage, we designed and implemented a query allowing the identification of potentially eligible publications, in order to investigate the current status of evidence regarding data quality–preserving relational anonymization of CDM-converted health care data. The considerable amount of eligible literature obtained from Web of Science showed that useful information could be found to describe how relational data anonymizations are performed in the context of CDM-transformed health data and to what extent the quality and utility of obtained anonymous data are addressed in consideration of CDM specifications.</p>
      <p>However, a more detailed analysis of these citations should support (1) investigating how the several privacy models, data transformation techniques, and utility models [<xref ref-type="bibr" rid="ref1">1</xref>] are applied on CDM-converted health data, and (2) document the findings into the CDM databases. Moreover, the obtained set of literature could cover a wide range of current formal anonymization techniques, technologies related to Extraction-Transformation-Load processes for converting source data to the CDM format, or numerous data quality assessment frameworks. This requires a meticulous literature analysis strategy to include the most pertinent citations, which should enable answering the research questions. By following up on the systematic review of Fernández-Alemán et al [<xref ref-type="bibr" rid="ref37">37</xref>], revealing the necessity of complementary work concerning the security and privacy of EHR data systems, and the investigation by Majeed and Lee [<xref ref-type="bibr" rid="ref1">1</xref>], presenting the quantification of both utility and privacy of anonymized sensitive data for some scenarios as a challenging task, this scoping review should serve as a response to these questions, capture and describe the current evidence about utility-preserving anonymization of tabular CDM-based health data, and help identify potentially existing research gaps. This aspect is adequately in line with some of the main goals for conducting a scoping review as proposed by Arksey and O’Malley [<xref ref-type="bibr" rid="ref33">33</xref>], which are to summarize and disseminate research findings and to identify research gaps in the existing literature.</p>
      <p>Nevertheless, the planned scoping review might include some restrictions. Regarding the scope of the intended literature review, just a focus on formal data privacy models should be addressed, including, for instance, the k-anonymization, <italic>l</italic>-diversity, differential privacy, and <italic>t</italic>-closeness privacy models. Moreover, only the relational (table-based) data anonymization methods should be approached due to their frequent application for anonymizing tabular data in the medical context. A follow-up review including further anonymization frameworks such as social network– or graph-based data anonymization [<xref ref-type="bibr" rid="ref1">1</xref>] in the clinical context could be subsequently initiated. However, to address the four-eyes principle on the proposed literature search string early, we will proceed with the validation of the search query by a librarian from the licensed library of Medical Faculty Mannheim, Heidelberg University, in order to correspondingly mitigate any potential conceptual or technical issues in the query.</p>
      <p>Among other aspects, it is pertinent to point out that the anticipated definition of the study’s specifications is an essential approach for limiting decision conflicts and providing transparency in the completion of this literature review. This should foster a reproducible and transferable methodology and disseminate reliable insights necessary to enhance and to better understand the approaches for preserving patient privacy and data quality in the secondary use of harmonized health care data.</p>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>Revised literature search query.</p>
        <media xlink:href="resprot_v12i1e46471_app1.txt" xlink:title="TXT File , 1 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">CDISC</term>
          <def>
            <p>Clinical Data Interchange Standards Consortium</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">CDM</term>
          <def>
            <p>common data model</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">EHR</term>
          <def>
            <p>electronic health record</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">FHIR</term>
          <def>
            <p>Fast Healthcare Interoperability Resources</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">i2b2</term>
          <def>
            <p>Informatics for Integrating Biology &#38; the Bedside</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">MIRACUM</term>
          <def>
            <p>Medical Informatics in Research and Care in University Medicine</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">ODM</term>
          <def>
            <p>Operational Data Model</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">OHDSI</term>
          <def>
            <p>Observational Health Data Sciences and Informatics</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb9">OMOP</term>
          <def>
            <p>Observational Medical Outcomes Partnership</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb10">PCORnet</term>
          <def>
            <p>Patient-Centered Outcomes Research network</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb11">PRISMA-ScR</term>
          <def>
            <p>Preferred Reporting Items for Systematic Reviews and Meta-Analyses Extension for Scoping Reviews</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>This work is funded by the German Federal Ministry of Education and Research within the German Medical Informatics Initiative (grant 01ZZ1801E; Medical Informatics in Research and Care in University Medicine). The authors would like to thank Kim Hee for critiquing the manuscript. For the publication fee, we acknowledge financial support from Heidelberg University and Deutsche Forschungsgemeinschaft within the “Open Access Publikationskosten” finding program.</p>
    </ack>
    <notes>
      <sec>
        <title>Data Availability</title>
        <p>Scripts for the technical implementation of citation visualization based on statistical software such as RStudio [<xref ref-type="bibr" rid="ref38">38</xref>], as well as files containing the collected literature and other study documents, will be made available on an open publicly available repository such as Zenodo [<xref ref-type="bibr" rid="ref39">39</xref>]. These data will be available in an anonymized format for facilitating more transparency and for offering the possibility to reproduce the literature extraction, charting, and analysis processes.</p>
      </sec>
    </notes>
    <fn-group>
      <fn fn-type="con">
        <p>All authors commented on the draft and approved the final manuscript version.</p>
      </fn>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Majeed</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Anonymization techniques for privacy preserving data publishing: a comprehensive survey</article-title>
          <source>IEEE Access</source>
          <year>2021</year>
          <volume>9</volume>
          <fpage>8512</fpage>
          <lpage>8545</lpage>
          <pub-id pub-id-type="doi">10.1109/access.2020.3045700</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bönisch</surname>
              <given-names>Caroline</given-names>
            </name>
            <name name-style="western">
              <surname>Kesztyüs</surname>
              <given-names>Dorothea</given-names>
            </name>
            <name name-style="western">
              <surname>Kesztyüs</surname>
              <given-names>Tibor</given-names>
            </name>
          </person-group>
          <article-title>Harvesting metadata in clinical care: a crosswalk between FHIR, OMOP, CDISC and openEHR metadata</article-title>
          <source>Sci Data</source>
          <year>2022</year>
          <month>10</month>
          <day>28</day>
          <volume>9</volume>
          <issue>1</issue>
          <fpage>659</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/s41597-022-01792-7"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41597-022-01792-7</pub-id>
          <pub-id pub-id-type="medline">36307424</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41597-022-01792-7</pub-id>
          <pub-id pub-id-type="pmcid">PMC9616884</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Voss</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Makadia</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Matcho</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Ma</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Knoll</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Schuemie</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>DeFalco</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Londhe</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Zhu</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Ryan</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Feasibility and utility of applications of the common data model to multiple, disparate observational health databases</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2015</year>
          <month>05</month>
          <volume>22</volume>
          <issue>3</issue>
          <fpage>553</fpage>
          <lpage>564</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/25670757"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/jamia/ocu023</pub-id>
          <pub-id pub-id-type="medline">25670757</pub-id>
          <pub-id pub-id-type="pii">ocu023</pub-id>
          <pub-id pub-id-type="pmcid">PMC4457111</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Prasser</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Eicher</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Spengler</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Bild</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Kuhn</surname>
              <given-names>KA</given-names>
            </name>
          </person-group>
          <article-title>Flexible data anonymization using ARX—current status and challenges ahead</article-title>
          <source>Softw: Pract Exper</source>
          <year>2020</year>
          <month>02</month>
          <day>25</day>
          <volume>50</volume>
          <issue>7</issue>
          <fpage>1277</fpage>
          <lpage>1304</lpage>
          <pub-id pub-id-type="doi">10.1002/spe.2812</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Haber</surname>
              <given-names>Anna C</given-names>
            </name>
            <name name-style="western">
              <surname>Sax</surname>
              <given-names>Ulrich</given-names>
            </name>
            <name name-style="western">
              <surname>Prasser</surname>
              <given-names>Fabian</given-names>
            </name>
            <collab>NFDI4Health Consortium</collab>
          </person-group>
          <article-title>Open tools for quantitative anonymization of tabular phenotype data: literature review</article-title>
          <source>Brief Bioinform</source>
          <year>2022</year>
          <month>11</month>
          <day>19</day>
          <volume>23</volume>
          <issue>6</issue>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/36215114"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/bib/bbac440</pub-id>
          <pub-id pub-id-type="medline">36215114</pub-id>
          <pub-id pub-id-type="pii">6754758</pub-id>
          <pub-id pub-id-type="pmcid">PMC9677485</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Almeida</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Barraca</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Oliveira</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Preserving privacy when querying OMOP CDM databases</article-title>
          <source>Stud Health Technol Inform</source>
          <year>2022</year>
          <month>08</month>
          <day>31</day>
          <volume>298</volume>
          <fpage>163</fpage>
          <lpage>164</lpage>
          <pub-id pub-id-type="doi">10.3233/SHTI220930</pub-id>
          <pub-id pub-id-type="medline">36073478</pub-id>
          <pub-id pub-id-type="pii">SHTI220930</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>SWEENEY</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>k-Anonimity: a model for protecting privacy</article-title>
          <source>Int J Unc Fuzz Knowl Based Syst</source>
          <year>2012</year>
          <month>05</month>
          <day>02</day>
          <volume>10</volume>
          <issue>05</issue>
          <fpage>557</fpage>
          <lpage>570</lpage>
          <pub-id pub-id-type="doi">10.1142/s0218488502001648</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Machanavajjhala</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Kifer</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Gehrke</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Venkitasubramaniam</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>L-diversity: Privacy beyond k-anonymity</article-title>
          <source>ACM Trans Knowl Discov Data</source>
          <year>2007</year>
          <month>03</month>
          <volume>1</volume>
          <issue>1</issue>
          <fpage>3</fpage>
          <pub-id pub-id-type="doi">10.1145/1217299.1217302</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Li</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Tiancheng</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Suresh</surname>
              <given-names>V</given-names>
            </name>
          </person-group>
          <article-title>t-Closeness: privacy beyond k-anonymity and l-diversity</article-title>
          <year>2006</year>
          <conf-name>2007 IEEE 23rd International Conference on Data Engineering</conf-name>
          <conf-date>April 15-20, 2007</conf-date>
          <conf-loc>Istanbul, Turkey</conf-loc>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Dwork</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Differential privacy: a survey of results</article-title>
          <year>2008</year>
          <conf-name>TAMC: Annual Conference on Theory and Applications of Models of Computation</conf-name>
          <conf-date>April 25-29, 2008</conf-date>
          <conf-loc>Xi'an, China</conf-loc>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lei</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Chunxiao</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Jian</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Jian</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Yong</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Information security in big data: privacy and data mining</article-title>
          <source>IEEE Access</source>
          <year>2014</year>
          <volume>2</volume>
          <fpage>1149</fpage>
          <lpage>1176</lpage>
          <pub-id pub-id-type="doi">10.1109/access.2014.2362522</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rahimi</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Bateni</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Mohammadinejad</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Extended K-anonymity model for privacy preserving on micro data</article-title>
          <source>IJCNIS</source>
          <year>2015</year>
          <month>11</month>
          <day>08</day>
          <volume>7</volume>
          <issue>12</issue>
          <fpage>42</fpage>
          <lpage>51</lpage>
          <pub-id pub-id-type="doi">10.5815/ijcnis.2015.12.05</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Fung</surname>
              <given-names>BCM</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Yu</surname>
              <given-names>PS</given-names>
            </name>
            <name name-style="western">
              <surname>Mehta</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>Privacy-preserving data publishing</article-title>
          <source>ACM Comput Surv</source>
          <year>2010</year>
          <month>06</month>
          <day>23</day>
          <volume>42</volume>
          <issue>4</issue>
          <fpage>1</fpage>
          <lpage>53</lpage>
          <pub-id pub-id-type="doi">10.1145/1749603.1749605</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Schmidt</surname>
              <given-names>CO</given-names>
            </name>
            <name name-style="western">
              <surname>Struckmann</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Enzenbach</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Reineke</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Stausberg</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Damerow</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Huebner</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Schmidt</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Sauerbrei</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Richter</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Facilitating harmonized data quality assessments. A data quality framework for observational health research data collections with software implementations in R</article-title>
          <source>BMC Med Res Methodol</source>
          <year>2021</year>
          <month>04</month>
          <day>02</day>
          <volume>21</volume>
          <issue>1</issue>
          <fpage>63</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcmedresmethodol.biomedcentral.com/articles/10.1186/s12874-021-01252-7"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12874-021-01252-7</pub-id>
          <pub-id pub-id-type="medline">33810787</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12874-021-01252-7</pub-id>
          <pub-id pub-id-type="pmcid">PMC8019177</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kahn</surname>
              <given-names>MG</given-names>
            </name>
            <name name-style="western">
              <surname>Callahan</surname>
              <given-names>TJ</given-names>
            </name>
            <name name-style="western">
              <surname>Barnard</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Bauck</surname>
              <given-names>AE</given-names>
            </name>
            <name name-style="western">
              <surname>Brown</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Davidson</surname>
              <given-names>BN</given-names>
            </name>
            <name name-style="western">
              <surname>Estiri</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Goerg</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Holve</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Johnson</surname>
              <given-names>SG</given-names>
            </name>
            <name name-style="western">
              <surname>Liaw</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Hamilton-Lopez</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Meeker</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Ong</surname>
              <given-names>TC</given-names>
            </name>
            <name name-style="western">
              <surname>Ryan</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Shang</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Weiskopf</surname>
              <given-names>NG</given-names>
            </name>
            <name name-style="western">
              <surname>Weng</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Zozus</surname>
              <given-names>MN</given-names>
            </name>
            <name name-style="western">
              <surname>Schilling</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>A harmonized data quality assessment terminology and framework for the secondary use of electronic health record data</article-title>
          <source>EGEMS (Wash DC)</source>
          <year>2016</year>
          <month>09</month>
          <day>11</day>
          <volume>4</volume>
          <issue>1</issue>
          <fpage>1244</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/27713905"/>
          </comment>
          <pub-id pub-id-type="doi">10.13063/2327-9214.1244</pub-id>
          <pub-id pub-id-type="medline">27713905</pub-id>
          <pub-id pub-id-type="pii">egems1244</pub-id>
          <pub-id pub-id-type="pmcid">PMC5051581</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Weiskopf</surname>
              <given-names>NG</given-names>
            </name>
            <name name-style="western">
              <surname>Bakken</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Hripcsak</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Weng</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>A data quality assessment guideline for electronic health record data reuse</article-title>
          <source>EGEMS (Wash DC)</source>
          <year>2017</year>
          <month>09</month>
          <day>04</day>
          <volume>5</volume>
          <issue>1</issue>
          <fpage>14</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/29881734"/>
          </comment>
          <pub-id pub-id-type="doi">10.5334/egems.218</pub-id>
          <pub-id pub-id-type="medline">29881734</pub-id>
          <pub-id pub-id-type="pmcid">PMC5983018</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Weiskopf</surname>
              <given-names>NG</given-names>
            </name>
            <name name-style="western">
              <surname>Hripcsak</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Swaminathan</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Weng</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Defining and measuring completeness of electronic health records for secondary use</article-title>
          <source>J Biomed Inform</source>
          <year>2013</year>
          <month>10</month>
          <volume>46</volume>
          <issue>5</issue>
          <fpage>830</fpage>
          <lpage>836</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(13)00085-3"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2013.06.010</pub-id>
          <pub-id pub-id-type="medline">23820016</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(13)00085-3</pub-id>
          <pub-id pub-id-type="pmcid">PMC3810243</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Fox</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Aggarwal</surname>
              <given-names>VR</given-names>
            </name>
            <name name-style="western">
              <surname>Whelton</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Johnson</surname>
              <given-names>O</given-names>
            </name>
          </person-group>
          <article-title>A data quality framework for process mining of electronic health record data</article-title>
          <year>2018</year>
          <conf-name>2018 IEEE International Conference on Healthcare Informatics (ICHI)</conf-name>
          <conf-date>June 4-7, 2018</conf-date>
          <conf-loc>New York, NY</conf-loc>
          <pub-id pub-id-type="doi">10.1109/ICHI.2018.00009</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ayaz</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Pasha</surname>
              <given-names>MF</given-names>
            </name>
            <name name-style="western">
              <surname>Alzahrani</surname>
              <given-names>MY</given-names>
            </name>
            <name name-style="western">
              <surname>Budiarto</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Stiawan</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>The Fast Health Interoperability Resources (FHIR) standard: systematic literature review of implementations, applications, challenges and opportunities</article-title>
          <source>JMIR Med Inform</source>
          <year>2021</year>
          <month>07</month>
          <day>30</day>
          <volume>9</volume>
          <issue>7</issue>
          <fpage>e21929</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://medinform.jmir.org/2021/7/e21929/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/21929</pub-id>
          <pub-id pub-id-type="medline">34328424</pub-id>
          <pub-id pub-id-type="pii">v9i7e21929</pub-id>
          <pub-id pub-id-type="pmcid">PMC8367140</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Klann</surname>
              <given-names>Jeffrey G</given-names>
            </name>
            <name name-style="western">
              <surname>Abend</surname>
              <given-names>Aaron</given-names>
            </name>
            <name name-style="western">
              <surname>Raghavan</surname>
              <given-names>Vijay A</given-names>
            </name>
            <name name-style="western">
              <surname>Mandl</surname>
              <given-names>Kenneth D</given-names>
            </name>
            <name name-style="western">
              <surname>Murphy</surname>
              <given-names>Shawn N</given-names>
            </name>
          </person-group>
          <article-title>Data interchange using i2b2</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2016</year>
          <month>09</month>
          <volume>23</volume>
          <issue>5</issue>
          <fpage>909</fpage>
          <lpage>915</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/26911824"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/jamia/ocv188</pub-id>
          <pub-id pub-id-type="medline">26911824</pub-id>
          <pub-id pub-id-type="pii">ocv188</pub-id>
          <pub-id pub-id-type="pmcid">PMC4997035</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hripcsak</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Duke</surname>
              <given-names>JD</given-names>
            </name>
            <name name-style="western">
              <surname>Shah</surname>
              <given-names>NH</given-names>
            </name>
            <name name-style="western">
              <surname>Reich</surname>
              <given-names>CG</given-names>
            </name>
            <name name-style="western">
              <surname>Huser</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Schuemie</surname>
              <given-names>MJ</given-names>
            </name>
            <name name-style="western">
              <surname>Suchard</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Park</surname>
              <given-names>RW</given-names>
            </name>
            <name name-style="western">
              <surname>Wong</surname>
              <given-names>ICK</given-names>
            </name>
            <name name-style="western">
              <surname>Rijnbeek</surname>
              <given-names>PR</given-names>
            </name>
            <name name-style="western">
              <surname>van der Lei</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Pratt</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Norén</surname>
              <given-names>G Niklas</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Stang</surname>
              <given-names>PE</given-names>
            </name>
            <name name-style="western">
              <surname>Madigan</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Ryan</surname>
              <given-names>PB</given-names>
            </name>
          </person-group>
          <article-title>Observational Health Data Sciences and Informatics (OHDSI): opportunities for observational researchers</article-title>
          <source>Stud Health Technol Inform</source>
          <year>2015</year>
          <volume>216</volume>
          <fpage>574</fpage>
          <lpage>578</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/26262116"/>
          </comment>
          <pub-id pub-id-type="medline">26262116</pub-id>
          <pub-id pub-id-type="pmcid">PMC4815923</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Fleurence</surname>
              <given-names>RL</given-names>
            </name>
            <name name-style="western">
              <surname>Curtis</surname>
              <given-names>LH</given-names>
            </name>
            <name name-style="western">
              <surname>Califf</surname>
              <given-names>RM</given-names>
            </name>
            <name name-style="western">
              <surname>Platt</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Selby</surname>
              <given-names>JV</given-names>
            </name>
            <name name-style="western">
              <surname>Brown</surname>
              <given-names>JS</given-names>
            </name>
          </person-group>
          <article-title>Launching PCORnet, a national patient-centered clinical research network</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2014</year>
          <month>07</month>
          <day>01</day>
          <volume>21</volume>
          <issue>4</issue>
          <fpage>578</fpage>
          <lpage>582</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/24821743"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/amiajnl-2014-002747</pub-id>
          <pub-id pub-id-type="medline">24821743</pub-id>
          <pub-id pub-id-type="pii">amiajnl-2014-002747</pub-id>
          <pub-id pub-id-type="pmcid">PMC4078292</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hume</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Aerts</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Sarnikar</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Huser</surname>
              <given-names>V</given-names>
            </name>
          </person-group>
          <article-title>Current applications and future directions for the CDISC Operational Data Model standard: A methodological review</article-title>
          <source>J Biomed Inform</source>
          <year>2016</year>
          <month>04</month>
          <volume>60</volume>
          <fpage>352</fpage>
          <lpage>362</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(16)00038-1"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2016.02.016</pub-id>
          <pub-id pub-id-type="medline">26944737</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(16)00038-1</pub-id>
          <pub-id pub-id-type="pmcid">PMC4837012</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Prokosch</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Acker</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Bernarding</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Binder</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Boeker</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Boerries</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Daumke</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Ganslandt</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Hesser</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Höning</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Neumaier</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Marquardt</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Renz</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Rothkötter</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Schade-Brittinger</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Schmücker</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Schüttler</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Sedlmayr</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Serve</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Sohrabi</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Storf</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>MIRACUM: Medical Informatics in Research and Care in University Medicine</article-title>
          <source>Methods Inf Med</source>
          <year>2018</year>
          <month>07</month>
          <day>17</day>
          <volume>57</volume>
          <issue>S 01</issue>
          <fpage>e82</fpage>
          <lpage>e91</lpage>
          <pub-id pub-id-type="doi">10.3414/me17-02-0025</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Maier</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Lang</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Storf</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Vormstein</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Bieber</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Bernarding</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Herrmann</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Haverkamp</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Horki</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Laufer</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Berger</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Höning</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Fritsch</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Schüttler</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Ganslandt</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Prokosch</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Sedlmayr</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Towards implementation of OMOP in a German university hospital consortium</article-title>
          <source>Appl Clin Inform</source>
          <year>2018</year>
          <month>01</month>
          <day>24</day>
          <volume>9</volume>
          <issue>1</issue>
          <fpage>54</fpage>
          <lpage>61</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.thieme-connect.com/DOI/DOI?10.1055/s-0037-1617452"/>
          </comment>
          <pub-id pub-id-type="doi">10.1055/s-0037-1617452</pub-id>
          <pub-id pub-id-type="medline">29365340</pub-id>
          <pub-id pub-id-type="pmcid">PMC5801887</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gruendner</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Gulden</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Kampf</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Mate</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Prokosch</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Zierk</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>A framework for criteria-based selection and processing of Fast Healthcare Interoperability Resources (FHIR) data for statistical analysis: design and implementation study</article-title>
          <source>JMIR Med Inform</source>
          <year>2021</year>
          <month>04</month>
          <day>01</day>
          <volume>9</volume>
          <issue>4</issue>
          <fpage>e25645</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://medinform.jmir.org/2021/4/e25645/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/25645</pub-id>
          <pub-id pub-id-type="medline">33792554</pub-id>
          <pub-id pub-id-type="pii">v9i4e25645</pub-id>
          <pub-id pub-id-type="pmcid">PMC8050750</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Pitoglou</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Filntisi</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Anastasiou</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Matsopoulos</surname>
              <given-names>GK</given-names>
            </name>
            <name name-style="western">
              <surname>Koutsouris</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Measuring the impact of anonymization on real-world consolidated health datasets engineered for secondary research use: Experiments in the context of MODELHealth project</article-title>
          <source>Front Digit Health</source>
          <year>2022</year>
          <month>9</month>
          <day>1</day>
          <volume>4</volume>
          <fpage>841853</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/36120716"/>
          </comment>
          <pub-id pub-id-type="doi">10.3389/fdgth.2022.841853</pub-id>
          <pub-id pub-id-type="medline">36120716</pub-id>
          <pub-id pub-id-type="pmcid">PMC9474677</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Langarizadeh</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Orooji</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Sheikhtaheri</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Effectiveness of anonymization methods in preserving patients' privacy: a systematic literature review</article-title>
          <source>Stud Health Technol Inform</source>
          <year>2018</year>
          <volume>248</volume>
          <fpage>80</fpage>
          <lpage>87</lpage>
          <pub-id pub-id-type="medline">29726422</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ferrão</surname>
              <given-names>Maria Eugénia</given-names>
            </name>
            <name name-style="western">
              <surname>Prata</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Fazendeiro</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Utility-driven assessment of anonymized data via clustering</article-title>
          <source>Sci Data</source>
          <year>2022</year>
          <month>07</month>
          <day>30</day>
          <volume>9</volume>
          <issue>1</issue>
          <fpage>456</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/s41597-022-01561-6"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41597-022-01561-6</pub-id>
          <pub-id pub-id-type="medline">35907927</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41597-022-01561-6</pub-id>
          <pub-id pub-id-type="pmcid">PMC9339002</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bassion</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>The Clinical Data Interchange Standards Consortium Laboratory Model: standardizing laboratory data interchange in clinical trials</article-title>
          <source>Drug Information J</source>
          <year>2003</year>
          <month>12</month>
          <day>30</day>
          <volume>37</volume>
          <issue>3</issue>
          <fpage>271</fpage>
          <lpage>281</lpage>
          <pub-id pub-id-type="doi">10.1177/009286150303700303</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Garza</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Del Fiol</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Tenenbaum</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Walden</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Zozus</surname>
              <given-names>MN</given-names>
            </name>
          </person-group>
          <article-title>Evaluating common data models for use with a longitudinal community registry</article-title>
          <source>J Biomed Inform</source>
          <year>2016</year>
          <month>12</month>
          <volume>64</volume>
          <fpage>333</fpage>
          <lpage>341</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(16)30153-8"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2016.10.016</pub-id>
          <pub-id pub-id-type="medline">27989817</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(16)30153-8</pub-id>
          <pub-id pub-id-type="pmcid">PMC6810649</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kapsner</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Kampf</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Seuchter</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Kamdje-Wabo</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Gradinger</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Ganslandt</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Mate</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Gruendner</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Kraska</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Prokosch</surname>
              <given-names>Hans-Ulrich</given-names>
            </name>
          </person-group>
          <article-title>Moving towards an EHR data quality framework: the MIRACUM approach</article-title>
          <source>Stud Health Technol Inform</source>
          <year>2019</year>
          <month>09</month>
          <day>03</day>
          <volume>267</volume>
          <fpage>247</fpage>
          <lpage>253</lpage>
          <pub-id pub-id-type="doi">10.3233/SHTI190834</pub-id>
          <pub-id pub-id-type="medline">31483279</pub-id>
          <pub-id pub-id-type="pii">SHTI190834</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Arksey</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>O'Malley</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Scoping studies: towards a methodological framework</article-title>
          <source>Int J Soc Res Methodol</source>
          <year>2005</year>
          <month>02</month>
          <volume>8</volume>
          <issue>1</issue>
          <fpage>19</fpage>
          <lpage>32</lpage>
          <pub-id pub-id-type="doi">10.1080/1364557032000119616</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gierend</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Krüger</surname>
              <given-names>Frank</given-names>
            </name>
            <name name-style="western">
              <surname>Waltemath</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Fünfgeld</surname>
              <given-names>Maximilian</given-names>
            </name>
            <name name-style="western">
              <surname>Ganslandt</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Zeleke</surname>
              <given-names>AA</given-names>
            </name>
          </person-group>
          <article-title>Approaches and criteria for provenance in biomedical data sets and workflows: protocol for a scoping review</article-title>
          <source>JMIR Res Protoc</source>
          <year>2021</year>
          <month>11</month>
          <day>22</day>
          <volume>10</volume>
          <issue>11</issue>
          <fpage>e31750</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.researchprotocols.org/2021/11/e31750/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/31750</pub-id>
          <pub-id pub-id-type="medline">34813494</pub-id>
          <pub-id pub-id-type="pii">v10i11e31750</pub-id>
          <pub-id pub-id-type="pmcid">PMC8663663</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>HE</given-names>
            </name>
            <name name-style="western">
              <surname>Cosa-Linan</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Santhanam</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Jannesari</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Maros</surname>
              <given-names>ME</given-names>
            </name>
            <name name-style="western">
              <surname>Ganslandt</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Transfer learning for medical image classification: a literature review</article-title>
          <source>BMC Med Imaging</source>
          <year>2022</year>
          <month>04</month>
          <day>13</day>
          <volume>22</volume>
          <issue>1</issue>
          <fpage>69</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcmedimaging.biomedcentral.com/articles/10.1186/s12880-022-00793-7"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12880-022-00793-7</pub-id>
          <pub-id pub-id-type="medline">35418051</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12880-022-00793-7</pub-id>
          <pub-id pub-id-type="pmcid">PMC9007400</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kellermeyer</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Harnke</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Knight</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Covidence and Rayyan</article-title>
          <source>J Med Libr Assoc</source>
          <year>2018</year>
          <month>10</month>
          <day>04</day>
          <volume>106</volume>
          <issue>4</issue>
          <pub-id pub-id-type="doi">10.5195/jmla.2018.513</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Fernández-Alemán</surname>
              <given-names>José Luis</given-names>
            </name>
            <name name-style="western">
              <surname>Señor</surname>
              <given-names>Inmaculada Carrión</given-names>
            </name>
            <collab>Lozoya</collab>
            <name name-style="western">
              <surname>Toval</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Security and privacy in electronic health records: a systematic literature review</article-title>
          <source>J Biomed Inform</source>
          <year>2013</year>
          <month>06</month>
          <volume>46</volume>
          <issue>3</issue>
          <fpage>541</fpage>
          <lpage>562</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(12)00186-4"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2012.12.003</pub-id>
          <pub-id pub-id-type="medline">23305810</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(12)00186-4</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref38">
        <label>38</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gandrud</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <source>Reproducible Research with R and RStudio</source>
          <year>2018</year>
          <publisher-loc>New York, NY</publisher-loc>
          <publisher-name>Chapman and Hall/CRC</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref39">
        <label>39</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sicilia</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>García-Barriocanal</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Sánchez-Alonso</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Community curation in open dataset repositories: insights from Zenodo</article-title>
          <source>Procedia Comput Sci</source>
          <year>2017</year>
          <volume>106</volume>
          <fpage>54</fpage>
          <lpage>60</lpage>
          <pub-id pub-id-type="doi">10.1016/j.procs.2017.03.009</pub-id>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
