<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">ResProt</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR Res Protoc</journal-id>
      <journal-title>JMIR Research Protocols</journal-title>
      <issn pub-type="epub">1929-0748</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v10i11e31750</article-id>
      <article-id pub-id-type="pmid">34813494</article-id>
      <article-id pub-id-type="doi">10.2196/31750</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Protocol</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Protocol</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Approaches and Criteria for Provenance in Biomedical Data Sets and Workflows: Protocol for a Scoping Review</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Eysenbach</surname>
            <given-names>Gunther</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Curcin</surname>
            <given-names>Vasa</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Miksa</surname>
            <given-names>Tomasz</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Gierend</surname>
            <given-names>Kerstin</given-names>
          </name>
          <degrees>Dipl Inf (FH)</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>Department of Biomedical Informatics at the Center for Preventive Medicine and Digital Health</institution>
            <institution>Medical Faculty Mannheim</institution>
            <institution>Heidelberg University</institution>
            <addr-line>Theodor-Kutzer-Ufer 1-3</addr-line>
            <addr-line>Mannheim, 68167</addr-line>
            <country>Germany</country>
            <phone>49 0621 383 ext 8087</phone>
            <email>kerstin.gierend@medma.uni-heidelberg.de</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-0417-3454</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author">
          <name name-style="western">
            <surname>Krüger</surname>
            <given-names>Frank</given-names>
          </name>
          <degrees>Dr Ing</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-7925-3363</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Waltemath</surname>
            <given-names>Dagmar</given-names>
          </name>
          <degrees>Prof Dr Ing</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-5886-5563</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Fünfgeld</surname>
            <given-names>Maximilian</given-names>
          </name>
          <degrees>Dr rer nat</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-6720-9795</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author">
          <name name-style="western">
            <surname>Ganslandt</surname>
            <given-names>Thomas</given-names>
          </name>
          <degrees>Prof Dr med</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-6864-8936</ext-link>
        </contrib>
        <contrib id="contrib6" contrib-type="author">
          <name name-style="western">
            <surname>Zeleke</surname>
            <given-names>Atinkut Alamirrew</given-names>
          </name>
          <degrees>Dr rer medic</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-7838-9050</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Department of Biomedical Informatics at the Center for Preventive Medicine and Digital Health</institution>
        <institution>Medical Faculty Mannheim</institution>
        <institution>Heidelberg University</institution>
        <addr-line>Mannheim</addr-line>
        <country>Germany</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Department of Communications Engineering</institution>
        <institution>University of Rostock</institution>
        <addr-line>Rostock</addr-line>
        <country>Germany</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>Department of Medical Informatics, Institute for Community Medicine</institution>
        <institution>University Medicine Greifswald</institution>
        <addr-line>Greifswald</addr-line>
        <country>Germany</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Kerstin Gierend <email>kerstin.gierend@medma.uni-heidelberg.de</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <month>11</month>
        <year>2021</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>22</day>
        <month>11</month>
        <year>2021</year>
      </pub-date>
      <volume>10</volume>
      <issue>11</issue>
      <elocation-id>e31750</elocation-id>
      <history>
        <date date-type="received">
          <day>2</day>
          <month>7</month>
          <year>2021</year>
        </date>
        <date date-type="rev-request">
          <day>11</day>
          <month>8</month>
          <year>2021</year>
        </date>
        <date date-type="rev-recd">
          <day>6</day>
          <month>9</month>
          <year>2021</year>
        </date>
        <date date-type="accepted">
          <day>7</day>
          <month>9</month>
          <year>2021</year>
        </date>
      </history>
      <copyright-statement>©Kerstin Gierend, Frank Krüger, Dagmar Waltemath, Maximilian Fünfgeld, Thomas Ganslandt, Atinkut Alamirrew Zeleke. Originally published in JMIR Research Protocols (https://www.researchprotocols.org), 22.11.2021.</copyright-statement>
      <copyright-year>2021</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Research Protocols, is properly cited. The complete bibliographic information, a link to the original publication on https://www.researchprotocols.org, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://www.researchprotocols.org/2021/11/e31750" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Provenance supports the understanding of data genesis, and it is a key factor to ensure the trustworthiness of digital objects containing (sensitive) scientific data. Provenance information contributes to a better understanding of scientific results and fosters collaboration on existing data as well as data sharing. This encompasses defining comprehensive concepts and standards for transparency and traceability, reproducibility, validity, and quality assurance during clinical and scientific data workflows and research.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>The aim of this scoping review is to investigate existing evidence regarding approaches and criteria for provenance tracking as well as disclosing current knowledge gaps in the biomedical domain. This review covers modeling aspects as well as metadata frameworks for meaningful and usable provenance information during creation, collection, and processing of (sensitive) scientific biomedical data. This review also covers the examination of quality aspects of provenance criteria.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>This scoping review will follow the methodological framework by Arksey and O'Malley. Relevant publications will be obtained by querying PubMed and Web of Science. All papers in English language will be included, published between January 1, 2006 and March 23, 2021. Data retrieval will be accompanied by manual search for grey literature. Potential publications will then be exported into a reference management software, and duplicates will be removed. Afterwards, the obtained set of papers will be transferred into a systematic review management tool. All publications will be screened, extracted, and analyzed: title and abstract screening will be carried out by 4 independent reviewers. Majority vote is required for consent to eligibility of papers based on the defined inclusion and exclusion criteria. Full-text reading will be performed independently by 2 reviewers and in the last step, key information will be extracted on a pretested template. If agreement cannot be reached, the conflict will be resolved by a domain expert. Charted data will be analyzed by categorizing and summarizing the individual data items based on the research questions. Tabular or graphical overviews will be given, if applicable.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>The reporting follows the extension of the Preferred Reporting Items for Systematic reviews and Meta-Analyses statements for Scoping Reviews. Electronic database searches in PubMed and Web of Science resulted in 469 matches after deduplication. As of September 2021, the scoping review is in the full-text screening stage. The data extraction using the pretested charting template will follow the full-text screening stage. We expect the scoping review report to be completed by February 2022.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>Information about the origin of healthcare data has a major impact on the quality and the reusability of scientific results as well as follow-up activities. This protocol outlines plans for a scoping review that will provide information about current approaches, challenges, or knowledge gaps with provenance tracking in biomedical sciences.</p>
        </sec>
        <sec sec-type="registered-report">
          <title>International Registered Report Identifier (IRRID)</title>
          <p>DERR1-10.2196/31750</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>provenance</kwd>
        <kwd>biomedical</kwd>
        <kwd>workflow</kwd>
        <kwd>data sharing</kwd>
        <kwd>lineage</kwd>
        <kwd>scoping review</kwd>
        <kwd>data genesis</kwd>
        <kwd>scientific data</kwd>
        <kwd>digital objects</kwd>
        <kwd>healthcare data</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <p>The (re-)use of electronic medical and patient-related data offers enormous potential for further investigations in clinical research [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref2">2</xref>]. Different national initiatives such as the French Health Data Hub initiative or the German Medical Informatics Initiatives are committed to better knowledge discovery and data sharing in the health care domain [<xref ref-type="bibr" rid="ref3">3</xref>]. Resulting outcomes enable patients and physicians a safe and rapid access to therapies or treatment options. Subsequently, treatment costs can be reduced. In this context, the access to quality-assured, traceable, and hence, credible shared data is essential. Providing information about the origin of data demands concepts for traceability to gain understanding for the relationships between results and source data. There is an increasing interest and need to ensure traceability throughout scientific practice. Consequently, a systematic knowledge compilation regarding provenance and potential gaps is needed.</p>
      <p>Provenance describes the origin of data. A basic understanding of the term “provenance” is given with the description “what happened” to the data [<xref ref-type="bibr" rid="ref4">4</xref>]. Several different models exist to formally express provenance information, for instance, the World Wide Web Consortium PROV standard or CWLProv [<xref ref-type="bibr" rid="ref5">5</xref>,<xref ref-type="bibr" rid="ref6">6</xref>]. Advantages and opportunities of providing data provenance have been demonstrated, for instance, from the experiences in the EU-Horizon 2020 TRANSFoRm project [<xref ref-type="bibr" rid="ref4">4</xref>]. Moreover, the importance of provenance and the relation to provenance within electronic health records is pointed out in the study of Johnson et al [<xref ref-type="bibr" rid="ref7">7</xref>]. A previously published systematic review of provenance systems already investigated tools and systems [<xref ref-type="bibr" rid="ref8">8</xref>]. However, our own work aims to understand current approaches and criteria as well as knowledge gaps for provenance in biomedical as well as domain-independent research.</p>
      <p>The fields of research data management and FAIR (findable-accessible-interoperable-reusable) data principles consider provenance as one of the research pillars [<xref ref-type="bibr" rid="ref9">9</xref>]. As such, a provenance-oriented approach requires thorough planning, execution, and evaluation of data management processes in the respective application domain [<xref ref-type="bibr" rid="ref1">1</xref>]. While capturing provenance information in the research, adherence to criteria such as consistency, interoperability, and confidentiality are required across all software tools [<xref ref-type="bibr" rid="ref2">2</xref>]. Furthermore, data privacy issues have to be respected during modeling to keep compliance with national and international requirements such as the European General Data Protection Regulation [<xref ref-type="bibr" rid="ref10">10</xref>,<xref ref-type="bibr" rid="ref11">11</xref>].</p>
      <p>Process quality with the associated workflow quality can be achieved by monitoring and troubleshooting in applications or in data integration scenarios such as Extract-Transform-Load jobs. This implies workflow requirements to be established on a fine- or coarse-grained provenance level for troubleshooting [<xref ref-type="bibr" rid="ref12">12</xref>]. Addressing data quality issues should support in reaching completeness, accuracy, and timeliness of the data and creates trust in it. However, heterogeneous data sources, dynamic infrastructures, data exchange across boundaries, and lack of standards for quality measures characterize the current state of electronic health record data sets [<xref ref-type="bibr" rid="ref13">13</xref>]. Contrarily, provenance information strengthens the credibility of the data and proves that data have not been intentionally or unintentionally changed in its life cycle [<xref ref-type="bibr" rid="ref14">14</xref>]. The concept and implementation of provenance is essential in most scientific domains such as environmental fields (geoprocessing workflows or climate assessments), in fusion engineering, or material sciences [<xref ref-type="bibr" rid="ref15">15</xref>,<xref ref-type="bibr" rid="ref16">16</xref>]. Since the use of machine learning techniques within the scope of decision support is becoming increasingly popular for medical researchers, they are under the obligation to prove their reproducibility [<xref ref-type="bibr" rid="ref17">17</xref>]. Therefore, systematic knowledge about the “what happened” and about reproducibility metrics such as data sets and code accessibility is indispensable and is in need of further investigation to provide provenance [<xref ref-type="bibr" rid="ref18">18</xref>].</p>
      <p>The aim of this scoping review is to investigate existing evidence regarding approaches and criteria for provenance tracking as well as disclosing current knowledge gaps in the biomedical domain. This comprises modeling aspects as well as metadata frameworks for meaningful and usable provenance information during creation, collection, and processing of (sensitive) scientific biomedical data. The review also covers the examination of quality aspects of provenance criteria.</p>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Design</title>
        <p>The individual elements from the framework of Arksey and O’Malley [<xref ref-type="bibr" rid="ref19">19</xref>] will be used as a roadmap for this scoping review. Essential methodological steps will cover the stages (1) identification of the research questions, (2) identification of relevant studies, (3) study selection, (4) data extraction and charting, and (5) collating, summarizing, and reporting the results. Any subsequent deviations of the final report from the scoping review protocol will be clearly highlighted and explained in the scoping review report.</p>
      </sec>
      <sec>
        <title>Ethics</title>
        <p>Ethical approval was not required because only literature will be evaluated without processing sensitive patient data.</p>
      </sec>
      <sec>
        <title>Stage 1: Identification of the Research Questions</title>
        <p>At first, an informal prescreening of relevant literature in PubMed and Web of Science as well as grey literature from conferences or organizations was carried out to determine the keywords in scope. Relevant literature was identified with the support of a librarian. PubMed was searched using the keywords “provenance” and “tracking.” The reviewer team explored, studied, and scrutinized additional literature based on search combinations of terms linked to the topic “provenance.” Ten publications were selected and reviewed by the team in an iterative process to guide the implementation of the research questions. During this step, keywords from titles and abstracts were gathered and analyzed by implementing the search strategy based on them. The following research questions were generated to meet the objective of this scoping review before study conduction: to investigate existing evidence regarding approaches and criteria for provenance tracking as well as disclosing current knowledge gaps in the biomedical domain. This review covers modeling aspects as well as metadata frameworks for meaningful and usable provenance information during creation, collection, and processing of (sensitive) scientific biomedical data. This review also covers the examination of quality aspects of provenance criteria.</p>
        <p>Research question 1: Which potential (methodological) approaches exist for the classification and tracking of provenance criteria and methods in a biomedical or domain-independent context?</p>
        <p>Research question 2: How can the potential value of provenance information be harnessed and by whom? How can usability be provided?</p>
        <p>Research question 3: What are the challenges and potential problems or bottlenecks for the accomplishment of provenance?</p>
        <p>Research question 4: Which guidelines or demands for the consideration of provenance criteria in a biomedical or domain-independent context have to be followed?</p>
        <p>Research question 5: How completely can provenance be mapped in the data lifecycle or during data management?</p>
      </sec>
      <sec>
        <title>Stage 2: Identification of Relevant Studies</title>
        <p>Relevant publications will be retrieved using concepts together with their associated keywords as selected from “Stage 1: Identification of the research questions.” Concepts are categorized into 4 groups: target domain, provenance, provenance properties, and objective. Target domain refers to the context of the research topic and includes studies with a biomedical, health care, clinical, or scientific background. Scientific background is limited to domain-independent studies and excludes all other domain-specific studies. The concept “provenance” concerns the information about the genesis of a given object while the concept “provenance properties” covers specific requirements tied to the term “provenance” or describes selected characteristics in this context. The concept “objective” embraces the range of purpose or the intention of provenance. <xref ref-type="table" rid="table1">Table 1</xref> provides an overview of the eligibility criteria derived from the categorization of the concepts together with the defined terms and their matching keywords.</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Concepts and matching keywords (eligibility criteria).</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="180"/>
            <col width="820"/>
            <thead>
              <tr valign="top">
                <td>Concepts</td>
                <td>Matching keywords (inclusion criteria)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Target domain</td>
                <td>biomed*<sup>a</sup>, EHR, electronic health record, healthcare, clinical, scientific<sup>b</sup></td>
              </tr>
              <tr valign="top">
                <td>Provenance</td>
                <td>provenance, prov, lineage</td>
              </tr>
              <tr valign="top">
                <td>Provenance properties</td>
                <td>interop*, (data NEAR/2 [flow, quality, transformation]), metadata, workflow, semantic, framework, annotat*, ontolog*, management, document*, (model NEAR/2 provenance)</td>
              </tr>
              <tr valign="top">
                <td>Objective</td>
                <td>audit*, decision support, ETL, Extract-Transform-Load, FHIR, record linking, machine learning, reproducib*, transparen*, track*, implement*</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table1fn1">
              <p><sup>a</sup>The * symbol (wildcard character) replaces or represents one or more characters.</p>
            </fn>
            <fn id="table1fn2">
              <p><sup>b</sup>Will be used in a domain-independent context only.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <p>A comprehensive search strategy for identifying the relevant literature, based on the given table, was implemented in PubMed and Web of Science. Medical subject headings were applied in PubMed. Additionally, the Boolean operators AND OR were used within the search strategy for combining the individual concepts and their associated keywords.</p>
        <p>The inclusion criteria comprised all papers in the English language and published between January 1, 2006 and March 23, 2021. The concepts and their related keywords, as shown in <xref ref-type="table" rid="table1">Table 1</xref>, are considered during the selection of the papers within the biomedical or domain-independent area. The start date for inclusion of literature was chosen owing to the initiation of the Open Provenance Model in 2006 as a result of the Provenance Challenge series [<xref ref-type="bibr" rid="ref20">20</xref>]. Grey literature from relevant project reports and proceedings were searched and reviewed for eligibility. All search results were exported to a reference management tool to eliminate duplications. Unique results were exported to the web-based screening tool Rayyan (Qatar Computing Research Institute) [<xref ref-type="bibr" rid="ref21">21</xref>]. The PRISMA-ScR (Preferred Reporting Items for Systematic reviews and Meta-analyses extension for Scoping Reviews) will be used for reporting of this scoping review [<xref ref-type="bibr" rid="ref22">22</xref>].</p>
      </sec>
      <sec>
        <title>Stage 3: Study Selection</title>
        <p>During the scoping review process, decisions to select or eliminate studies are tracked using Rayyan. That way, independent screening by the reviewers is enabled. Rayyan allows citation sharing and blinded comparison of decisions for inclusion and exclusion of selected studies. All imported publications will be screened by reading the title and abstract by all 4 reviewers. Title-abstract screening is the process of reviewing the references for inclusion based solely upon their title and abstract. Reviewers will screen out irrelevant references whereby the inclusion and exclusion criteria serve as the basis for their eligibility decision. Conflicts will be resolved since at least 3 unified classifications are necessary for inclusion or exclusion of a publication in an unblinded modus. The included (=eligible) publications will be examined in a full-text screening phase to determine the extent to which they can answer the research questions. Each publication must be read by 2 researchers to determine the relevance to the research questions. If there is no joint agreement, an independent researcher will be consulted. A description and a PRISMA flow chart of the selection process with frequencies for references considered in the different databases will be provided as well as counting in the subsequent title-abstract screening process based on the eligibility criteria.</p>
      </sec>
      <sec>
        <title>Stage 4: Data Extraction and Charting</title>
        <p>The data collection process will be documented by the reviewers while using the collectively developed template as provided in <xref ref-type="table" rid="table2">Table 2</xref>. The approach to data extraction needs to be consistent with the research question and purpose. This charting form will be pretested and will be used after closed alignment between the reviewers. “Pretested” means that 2 reviewers will independently complete the template for 5 studies ahead of the main study. They will compare the result with regard to a consistent approach and agree on necessary updates in the template, if necessary. Reviewers will diligently extract and update the study data from the identified papers in scope during their full-text review in an iterative process.</p>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>Data charting template for key information from eligible papers.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="380"/>
            <col width="620"/>
            <thead>
              <tr valign="top">
                <td>Metadata publication</td>
                <td>Characteristic extraction and specification</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Title<sup>a</sup></td>
                <td>Title</td>
              </tr>
              <tr valign="top">
                <td>Citation details<sup>a</sup></td>
                <td>Author (1st), journal, DOI</td>
              </tr>
              <tr valign="top">
                <td>Year of publication<sup>a</sup></td>
                <td>For example, YYYY</td>
              </tr>
              <tr valign="top">
                <td>Publication type<sup>a</sup></td>
                <td>Journal or website or conference, etc</td>
              </tr>
              <tr valign="top">
                <td>Study type<sup>a</sup></td>
                <td>Use case or development or evaluation</td>
              </tr>
              <tr valign="top">
                <td>Continent of study</td>
                <td>For example, Australia</td>
              </tr>
              <tr valign="top">
                <td>Institute<sup>a</sup></td>
                <td>Contributing institute (corresponding author or—if not provided—1st author)</td>
              </tr>
              <tr valign="top">
                <td>Corresponding author’s discipline</td>
                <td>For example, data architect</td>
              </tr>
              <tr valign="top">
                <td>Funding source</td>
                <td>Public or industry or none or missing</td>
              </tr>
              <tr valign="top">
                <td>Objective<sup>a</sup></td>
                <td>Aim of the publication</td>
              </tr>
              <tr valign="top">
                <td>Methods</td>
                <td>Strategies, processes, or techniques utilized in the collection or analyzing of data, how is the validity of the study judged</td>
              </tr>
              <tr valign="top">
                <td>Summary results<sup>a</sup></td>
                <td>Short description of results</td>
              </tr>
              <tr valign="top">
                <td>Conclusion</td>
                <td>Short description of conclusion</td>
              </tr>
              <tr valign="top">
                <td>Target domain<sup>a</sup></td>
                <td>Name specific domain or domain independent</td>
              </tr>
              <tr valign="top">
                <td>Keywords</td>
                <td>List keywords from abstract</td>
              </tr>
              <tr valign="top">
                <td>Metadata to key findings related to research questions</td>
                <td>Characteristic extraction and specification</td>
              </tr>
              <tr valign="top">
                <td>Research question 1: Approaches for classification and tracking of provenance criteria and methods in biomedical or domain-independent context</td>
                <td>Provide description in the domain for data suitability or data availability and other requirements or factors on data or systems regarding the trace of the data history (eg, role of provenance in terms of domain standards, ie, interoperability standards, FAIR [findable-accessible-interoperable-reusable] data, relation to metadata and model use, representation formalisms, etc), check definition of provenance</td>
              </tr>
              <tr valign="top">
                <td>Research question 2: Potential value of provenance information</td>
                <td>Provide possible use case description and types of data sources included, usability including effect on target domain and by whom it can be used and who will be the stakeholders;  <break/>  
            problems, if provenance is not available</td>
              </tr>
              <tr valign="top">
                <td>Research question 3: Potential problems or bottlenecks for the accomplishment of provenance</td>
                <td>Describe any challenges (eg, legal, organizational, or technical conditions) or problems that occurred during implementation phase of provenance</td>
              </tr>
              <tr valign="top">
                <td>Research question 4: Guidelines or demands for the consideration of provenance to be adhered to</td>
                <td>Describe any valid domain standard requirement, for example, legal, guidelines, rules</td>
              </tr>
              <tr valign="top">
                <td>Research question 5: Completeness of provenance information during data management process or data life cycle</td>
                <td>Describe any measurement or outcome available for completeness of provenance information</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table2fn1">
              <p><sup>a</sup>Obligatory input.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Stage 5: Collating, Summarizing, and Reporting the Results</title>
        <p>The charting results from stage 4 will be presented in the following steps [<xref ref-type="bibr" rid="ref19">19</xref>]. Analysis will be given by a qualitative evaluation and by summary statistics, charts, or equivalent appraisal. The reporting of the results and outcome will be aligned to the research questions. The meaning of the findings and their relation to the overall objectives will be discussed. Implications for future research, practice, and policy will be outlined. The reporting of the results will be aligned with the PRISMA-ScR reporting guidelines [<xref ref-type="bibr" rid="ref22">22</xref>].</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Schedule</title>
        <p>The scoping review started with a tentative search of the databases in PubMed and Web of Science in early 2021 (see stages 1-3) and resulted in 469 matches. These papers will be subjected to title-abstract screening in an interactive selection process for eligibility, followed by a full-text screening stage. These papers will be examined within an iterative selection process for inclusion into data charting (see stage 4). Data extraction will be finalized during the 4th quarter of 2021. The scoping review will be completed by summarizing and synthesizing the results by February 2022 (see stage 5).</p>
      </sec>
      <sec>
        <title>Anticipated Outcomes</title>
        <p>The scoping review will identify potentially relevant initiatives on provenance, and it will provide an overview of the evidence, gaps, and limitations for provenance criteria. All the evidence will be elaborated on the basis of the research questions. As such, the review can serve as preparatory work for achieving a comprehensive usable result on approaches and criteria for provenance. Based on the review results, the quality of the provenance criteria will be examined for a potential demarcation regarding minimum requirements for structuredness and completeness of provenance. We believe that this investigation supports provenance research with respect to the implementation of provenance in secondary use projects such as the German Medical Informatics Initiative. Within the Medical Informatics in Research and Care in University Medicine consortium, as part of the Medical Informatics Initiative, provenance has an important meaning to bioinformaticians and researchers [<xref ref-type="bibr" rid="ref23">23</xref>].</p>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <p>Implications for future work will be derived from the current status of research activities and their underlying concepts. We anticipate that implications will encompass conceptual and modeling approaches up to the generation of provenance-aware data as well as gaps in the current practices within the health care domain. We believe that our results will support the further development of guidelines, thereby overcoming the identified challenges and disclosing new opportunities for the classification and tracking of provenance criteria. Evidence will assist in recognizing and defining the preconditions for data sharing. It will further characterize data suitability and categories (eg, data governance, relevance, quality) at a fitness for purpose level in the health domain, considering the interests of different stakeholders. Finally, the scoping review will provide insights into whether a further assessment of the results is useful within a full systematic review.</p>
    </sec>
  </body>
  <back>
    <app-group/>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">PRISMA-ScR</term>
          <def>
            <p>Preferred Reporting Items for Systematic reviews and Meta-Analyses extension for Scoping Reviews</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>This research is funded by the German Federal Ministry of Education and Research within the German Medical Informatics Initiative with the grant 01ZZ1801E (Medical Informatics in Research and Care in University Medicine), by the Deutsche Forschungsgemeinschaft (DFG, German Research Foundation) SFB 1270/1-99150580, and by the National Research Data Infrastructure for Personal Health Data (NFDI4Health) DFG-funded project (Project 442326535).</p>
    </ack>
    <fn-group>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Jayapandian</surname>
              <given-names>CP</given-names>
            </name>
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Ewing</surname>
              <given-names>RM</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Sahoo</surname>
              <given-names>SS</given-names>
            </name>
          </person-group>
          <article-title>A semantic proteomics dashboard (SemPoD) for data management in translational research</article-title>
          <source>BMC Syst Biol</source>
          <year>2012</year>
          <volume>6 Suppl 3</volume>
          <fpage>S20</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.biomedcentral.com/1752-0509/6/S3/S20"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/1752-0509-6-S3-S20</pub-id>
          <pub-id pub-id-type="medline">23282161</pub-id>
          <pub-id pub-id-type="pii">1752-0509-6-S3-S20</pub-id>
          <pub-id pub-id-type="pmcid">PMC3524316</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Curcin</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Miles</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Danger</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Bache</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Taweel</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Implementing interoperable provenance in biomedical research</article-title>
          <source>Future Generation Computer Systems</source>
          <year>2014</year>
          <month>05</month>
          <volume>34</volume>
          <fpage>1</fpage>
          <lpage>16</lpage>
          <pub-id pub-id-type="doi">10.1016/j.future.2013.12.001</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cuggia</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Combes</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>The French Health Data Hub and the German Medical Informatics Initiatives: Two national projects to promote data sharing in healthcare</article-title>
          <source>Yearb Med Inform</source>
          <year>2019</year>
          <month>08</month>
          <volume>28</volume>
          <issue>1</issue>
          <fpage>195</fpage>
          <lpage>202</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.thieme-connect.com/DOI/DOI?10.1055/s-0039-1677917"/>
          </comment>
          <pub-id pub-id-type="doi">10.1055/s-0039-1677917</pub-id>
          <pub-id pub-id-type="medline">31419832</pub-id>
          <pub-id pub-id-type="pmcid">PMC6697511</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Curcin</surname>
              <given-names>V</given-names>
            </name>
          </person-group>
          <article-title>Embedding data provenance into the Learning Health System to facilitate reproducible research</article-title>
          <source>Learn Health Syst</source>
          <year>2017</year>
          <month>04</month>
          <volume>1</volume>
          <issue>2</issue>
          <fpage>e10019</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1002/lrh2.10019"/>
          </comment>
          <pub-id pub-id-type="doi">10.1002/lrh2.10019</pub-id>
          <pub-id pub-id-type="medline">31245557</pub-id>
          <pub-id pub-id-type="pii">LRH210019</pub-id>
          <pub-id pub-id-type="pmcid">PMC6516719</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Groth</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Moreau</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>PROV-overview</article-title>
          <source>W3C</source>
          <access-date>2021-06-10</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.w3.org/TR/prov-overview/">https://www.w3.org/TR/prov-overview/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Khan</surname>
              <given-names>Farah Zaib</given-names>
            </name>
            <name name-style="western">
              <surname>Soiland-Reyes</surname>
              <given-names>Stian</given-names>
            </name>
            <name name-style="western">
              <surname>Sinnott</surname>
              <given-names>Richard O</given-names>
            </name>
            <name name-style="western">
              <surname>Lonie</surname>
              <given-names>Andrew</given-names>
            </name>
            <name name-style="western">
              <surname>Goble</surname>
              <given-names>Carole</given-names>
            </name>
            <name name-style="western">
              <surname>Crusoe</surname>
              <given-names>Michael R</given-names>
            </name>
          </person-group>
          <article-title>Sharing interoperable workflow provenance: A review of best practices and their practical application in CWLProv</article-title>
          <source>Gigascience</source>
          <year>2019</year>
          <month>11</month>
          <day>01</day>
          <volume>8</volume>
          <issue>11</issue>
          <fpage>1</fpage>
          <lpage>27</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://academic.oup.com/gigascience/article-lookup/doi/10.1093/gigascience/giz095"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/gigascience/giz095</pub-id>
          <pub-id pub-id-type="medline">31675414</pub-id>
          <pub-id pub-id-type="pii">5611001</pub-id>
          <pub-id pub-id-type="pmcid">PMC6824458</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Johnson</surname>
              <given-names>KE</given-names>
            </name>
            <name name-style="western">
              <surname>Kamineni</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Fuller</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Olmstead</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Wernli</surname>
              <given-names>KJ</given-names>
            </name>
          </person-group>
          <article-title>How the provenance of electronic health record data matters for research: a case example using system mapping</article-title>
          <source>EGEMS (Wash DC)</source>
          <year>2014</year>
          <volume>2</volume>
          <issue>1</issue>
          <fpage>1058</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/25821838"/>
          </comment>
          <pub-id pub-id-type="doi">10.13063/2327-9214.1058</pub-id>
          <pub-id pub-id-type="medline">25821838</pub-id>
          <pub-id pub-id-type="pii">egems1058</pub-id>
          <pub-id pub-id-type="pmcid">PMC4371416</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Pérez</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Rubio</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Sáenz-Adán</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>A systematic review of provenance systems</article-title>
          <source>Knowl Inf Syst</source>
          <year>2018</year>
          <month>2</month>
          <day>17</day>
          <volume>57</volume>
          <issue>3</issue>
          <fpage>495</fpage>
          <lpage>543</lpage>
          <pub-id pub-id-type="doi">10.1007/s10115-018-1164-3</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Jauer</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Deserno</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Data provenance standards and recommendations for FAIR data</article-title>
          <source>Stud Health Technol Inform</source>
          <year>2020</year>
          <month>06</month>
          <day>16</day>
          <volume>270</volume>
          <fpage>1237</fpage>
          <lpage>1238</lpage>
          <pub-id pub-id-type="doi">10.3233/SHTI200380</pub-id>
          <pub-id pub-id-type="medline">32570597</pub-id>
          <pub-id pub-id-type="pii">SHTI200380</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hume</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Sarnikar</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Noteboom</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Enhancing traceability in clinical research data through a metadata framework</article-title>
          <source>Methods Inf Med</source>
          <year>2020</year>
          <month>05</month>
          <volume>59</volume>
          <issue>2-03</issue>
          <fpage>75</fpage>
          <lpage>85</lpage>
          <pub-id pub-id-type="doi">10.1055/s-0040-1714393</pub-id>
          <pub-id pub-id-type="medline">32894879</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sahoo</surname>
              <given-names>SS</given-names>
            </name>
            <name name-style="western">
              <surname>Nguyen</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Bodenreider</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Parikh</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Minning</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Sheth</surname>
              <given-names>AP</given-names>
            </name>
          </person-group>
          <article-title>A unified framework for managing provenance information in translational research</article-title>
          <source>BMC Bioinformatics</source>
          <year>2011</year>
          <month>11</month>
          <day>29</day>
          <volume>12</volume>
          <fpage>461</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcbioinformatics.biomedcentral.com/articles/10.1186/1471-2105-12-461"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/1471-2105-12-461</pub-id>
          <pub-id pub-id-type="medline">22126369</pub-id>
          <pub-id pub-id-type="pii">1471-2105-12-461</pub-id>
          <pub-id pub-id-type="pmcid">PMC3298549</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zheng</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Alawini</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Ives</surname>
              <given-names>Z</given-names>
            </name>
          </person-group>
          <year>2019</year>
          <month>04</month>
          <conf-name>35th International Conference on Data Engineering (ICDE)</conf-name>
          <conf-date>2019</conf-date>
          <conf-loc>Macao, China</conf-loc>
          <fpage>184</fpage>
          <lpage>195</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/31595143"/>
          </comment>
          <pub-id pub-id-type="doi">10.1109/ICDE.2019.00025</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Margheri</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Masi</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Miladi</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Sassone</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Rosenzweig</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Decentralised provenance for healthcare data</article-title>
          <source>Int J Med Inform</source>
          <year>2020</year>
          <month>09</month>
          <volume>141</volume>
          <fpage>104197</fpage>
          <pub-id pub-id-type="doi">10.1016/j.ijmedinf.2020.104197</pub-id>
          <pub-id pub-id-type="medline">32540775</pub-id>
          <pub-id pub-id-type="pii">S1386-5056(19)31203-1</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wing</surname>
              <given-names>JM</given-names>
            </name>
          </person-group>
          <article-title>The data life cycle</article-title>
          <source>Harvard Data Science Review</source>
          <year>2019</year>
          <month>06</month>
          <day>23</day>
          <fpage>1</fpage>
          <lpage>6</lpage>
          <pub-id pub-id-type="doi">10.1162/99608f92.e26845b4</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Schissel</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Abla</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Flanagan</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Greenwald</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Romosan</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Shoshani</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Stillerman</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Wright</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Automated metadata, provenance cataloging and navigable interfaces: Ensuring the usefulness of extreme-scale data</article-title>
          <source>Fusion Engineering and Design</source>
          <year>2014</year>
          <month>05</month>
          <volume>89</volume>
          <issue>5</issue>
          <fpage>745</fpage>
          <lpage>749</lpage>
          <pub-id pub-id-type="doi">10.1016/j.fusengdes.2014.01.053</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yakutovich</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Eimre</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Schütt</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Talirz</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Adorf</surname>
              <given-names>Cs</given-names>
            </name>
            <name name-style="western">
              <surname>Andersen</surname>
              <given-names>Cw</given-names>
            </name>
            <name name-style="western">
              <surname>Ditler</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Du</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Passerone</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Smit</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Marzari</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Pizzi</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Pignedoli</surname>
              <given-names>Ca</given-names>
            </name>
          </person-group>
          <article-title>AiiDAlab – an ecosystem for developing, executing, and sharing scientific workflows</article-title>
          <source>Computational Materials Science</source>
          <year>2021</year>
          <month>02</month>
          <volume>188</volume>
          <fpage>110165</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1016/j.commatsci.2020.110165"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.commatsci.2020.110165</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Samuel</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Löffler</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>König-Ries</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>Machine learning pipelines: provenance, reproducibility and FAIR data principles</article-title>
          <source>arXiv.org</source>
          <access-date>2021-05-16</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://arxiv.org/abs/2006.12117">http://arxiv.org/abs/2006.12117</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>McDermott</surname>
              <given-names>MBA</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Marinsek</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Ranganath</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Foschini</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Ghassemi</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Reproducibility in machine learning for health research: Still a ways to go</article-title>
          <source>Sci Transl Med</source>
          <year>2021</year>
          <month>03</month>
          <day>24</day>
          <volume>13</volume>
          <issue>586</issue>
          <fpage>eabb1655</fpage>
          <pub-id pub-id-type="doi">10.1126/scitranslmed.abb1655</pub-id>
          <pub-id pub-id-type="medline">33762434</pub-id>
          <pub-id pub-id-type="pii">13/586/eabb1655</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Arksey</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>O'Malley</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Scoping studies: towards a methodological framework</article-title>
          <source>International Journal of Social Research Methodology</source>
          <year>2005</year>
          <month>02</month>
          <volume>8</volume>
          <issue>1</issue>
          <fpage>19</fpage>
          <lpage>32</lpage>
          <pub-id pub-id-type="doi">10.1080/1364557032000119616</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Moreau</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Ludäscher</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Altintas</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Barga</surname>
              <given-names>Rs</given-names>
            </name>
            <name name-style="western">
              <surname>Bowers</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Callahan</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Chin</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Clifford</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Cohen</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Cohen-Boulakia</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Davidson</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Deelman</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Digiampietri</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Foster</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Freire</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Frew</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Futrelle</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Gibson</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Gil</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Goble</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Golbeck</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Groth</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Holland</surname>
              <given-names>Da</given-names>
            </name>
            <name name-style="western">
              <surname>Jiang</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Koop</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Krenek</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>McPhillips</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Mehta</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Miles</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Metzger</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Munroe</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Myers</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Plale</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Podhorszki</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Ratnakar</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Santos</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Scheidegger</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Schuchardt</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Seltzer</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Simmhan</surname>
              <given-names>Yl</given-names>
            </name>
            <name name-style="western">
              <surname>Silva</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Slaughter</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Stephan</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Stevens</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Turi</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Vo</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Wilde</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Special issue: the first Provenance Challenge</article-title>
          <source>Concurrency Computat.: Pract. Exper</source>
          <year>2008</year>
          <month>04</month>
          <day>10</day>
          <volume>20</volume>
          <issue>5</issue>
          <fpage>409</fpage>
          <lpage>418</lpage>
          <pub-id pub-id-type="doi">10.1002/cpe.1233</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ouzzani</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Hammady</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Fedorowicz</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Elmagarmid</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Rayyan-a web and mobile app for systematic reviews</article-title>
          <source>Syst Rev</source>
          <year>2016</year>
          <month>12</month>
          <day>05</day>
          <volume>5</volume>
          <issue>1</issue>
          <fpage>210</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://systematicreviewsjournal.biomedcentral.com/articles/10.1186/s13643-016-0384-4"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s13643-016-0384-4</pub-id>
          <pub-id pub-id-type="medline">27919275</pub-id>
          <pub-id pub-id-type="pii">10.1186/s13643-016-0384-4</pub-id>
          <pub-id pub-id-type="pmcid">PMC5139140</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Tricco</surname>
              <given-names>AC</given-names>
            </name>
            <name name-style="western">
              <surname>Lillie</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Zarin</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>O'Brien</surname>
              <given-names>KK</given-names>
            </name>
            <name name-style="western">
              <surname>Colquhoun</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Levac</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Moher</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Peters</surname>
              <given-names>MD</given-names>
            </name>
            <name name-style="western">
              <surname>Horsley</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Weeks</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Hempel</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Akl</surname>
              <given-names>EA</given-names>
            </name>
            <name name-style="western">
              <surname>Chang</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>McGowan</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Stewart</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Hartling</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Aldcroft</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Wilson</surname>
              <given-names>MG</given-names>
            </name>
            <name name-style="western">
              <surname>Garritty</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Lewin</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Godfrey</surname>
              <given-names>CM</given-names>
            </name>
            <name name-style="western">
              <surname>Macdonald</surname>
              <given-names>MT</given-names>
            </name>
            <name name-style="western">
              <surname>Langlois</surname>
              <given-names>EV</given-names>
            </name>
            <name name-style="western">
              <surname>Soares-Weiser</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Moriarty</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Clifford</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Tunçalp</surname>
              <given-names>Özge</given-names>
            </name>
            <name name-style="western">
              <surname>Straus</surname>
              <given-names>SE</given-names>
            </name>
          </person-group>
          <article-title>PRISMA extension for Scoping Reviews (PRISMA-ScR): checklist and explanation</article-title>
          <source>Ann Intern Med</source>
          <year>2018</year>
          <month>10</month>
          <day>02</day>
          <volume>169</volume>
          <issue>7</issue>
          <fpage>467</fpage>
          <lpage>473</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.acpjournals.org/doi/abs/10.7326/M18-0850?url_ver=Z39.88-2003&#38;rfr_id=ori:rid:crossref.org&#38;rfr_dat=cr_pub%3dpubmed"/>
          </comment>
          <pub-id pub-id-type="doi">10.7326/M18-0850</pub-id>
          <pub-id pub-id-type="medline">30178033</pub-id>
          <pub-id pub-id-type="pii">2700389</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Pugliese</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Knell</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Christoph</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Exchange of clinical and omics data according to FAIR principles: a review of open source solutions</article-title>
          <source>Methods Inf Med</source>
          <year>2020</year>
          <month>06</month>
          <volume>59</volume>
          <issue>S 01</issue>
          <fpage>e13</fpage>
          <lpage>e20</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.thieme-connect.com/DOI/DOI?10.1055/s-0040-1712968"/>
          </comment>
          <pub-id pub-id-type="doi">10.1055/s-0040-1712968</pub-id>
          <pub-id pub-id-type="medline">32620018</pub-id>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
