<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">ResProt</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR Res Protoc</journal-id>
      <journal-title>JMIR Research Protocols</journal-title>
      <issn pub-type="epub">1929-0748</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v14i1e58567</article-id>
      <article-id pub-id-type="pmid">40262134</article-id>
      <article-id pub-id-type="doi">10.2196/58567</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Protocol</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Protocol</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Patient-Related Metadata Reported in Sequencing Studies of SARS-CoV-2: Protocol for a Scoping Review and Bibliometric Analysis</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Leung</surname>
            <given-names>Tiffany</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Moreira</surname>
            <given-names>Maria Teresa</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Lokala</surname>
            <given-names>Usha</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author">
          <name name-style="western">
            <surname>O'Connor</surname>
            <given-names>Karen</given-names>
          </name>
          <degrees>MSc</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-7709-3813</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author">
          <name name-style="western">
            <surname>Weissenbacher</surname>
            <given-names>Davy</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-8331-3675</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Elyaderani</surname>
            <given-names>Amir</given-names>
          </name>
          <degrees>MSc</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-8639-8640</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Lautenbach</surname>
            <given-names>Ebbing</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff4" ref-type="aff">4</xref>
          <xref rid="aff5" ref-type="aff">5</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-4772-2409</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author">
          <name name-style="western">
            <surname>Scotch</surname>
            <given-names>Matthew</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <xref rid="aff6" ref-type="aff">6</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-5100-9724</ext-link>
        </contrib>
        <contrib id="contrib6" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Gonzalez-Hernandez</surname>
            <given-names>Graciela</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <address>
            <institution>Department of Computational Biomedicine</institution>
            <institution>Cedars-Sinai Medical Center</institution>
            <addr-line>700 N. San Vicente Blvd</addr-line>
            <addr-line>Pacific Design Center Suite G549F</addr-line>
            <addr-line>Los Angeles, CA, 90069</addr-line>
            <country>United States</country>
            <phone>1 310 423 3521</phone>
            <email>graciela.gonzalezhernandez@cshs.org</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-6416-9556</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Department of Biostatistics, Epidemiology, and Informatics</institution>
        <institution>Perelman School of Medicine</institution>
        <institution>University of Pennsylvania</institution>
        <addr-line>Philadelphia, PA</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Department of Computational Biomedicine</institution>
        <institution>Cedars-Sinai Medical Center</institution>
        <addr-line>Los Angeles, CA</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>Biodesign Center for Environmental Health Engineering</institution>
        <institution>Arizona State University</institution>
        <addr-line>Tempe, AZ</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff4">
        <label>4</label>
        <institution>Division of Infectious Diseases</institution>
        <institution>Department of Medicine</institution>
        <institution>University of Pennsylvania</institution>
        <addr-line>Philadelphia, PA</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff5">
        <label>5</label>
        <institution>Center for Clinical Epidemiology and Biostatistics</institution>
        <institution>Perelman School of Medicine</institution>
        <institution>University of Pennsylvania</institution>
        <addr-line>Philadelphia, PA</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff6">
        <label>6</label>
        <institution>College of Health Solutions</institution>
        <institution>Arizona State University</institution>
        <addr-line>Tempe, AZ</addr-line>
        <country>United States</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Graciela Gonzalez-Hernandez <email>graciela.gonzalezhernandez@cshs.org</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <year>2025</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>22</day>
        <month>4</month>
        <year>2025</year>
      </pub-date>
      <volume>14</volume>
      <elocation-id>e58567</elocation-id>
      <history>
        <date date-type="received">
          <day>19</day>
          <month>3</month>
          <year>2024</year>
        </date>
        <date date-type="rev-request">
          <day>17</day>
          <month>7</month>
          <year>2024</year>
        </date>
        <date date-type="rev-recd">
          <day>30</day>
          <month>9</month>
          <year>2024</year>
        </date>
        <date date-type="accepted">
          <day>27</day>
          <month>11</month>
          <year>2024</year>
        </date>
      </history>
      <copyright-statement>©Karen O'Connor, Davy Weissenbacher, Amir Elyaderani, Ebbing Lautenbach, Matthew Scotch, Graciela Gonzalez-Hernandez. Originally published in JMIR Research Protocols (https://www.researchprotocols.org), 22.04.2025.</copyright-statement>
      <copyright-year>2025</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Research Protocols, is properly cited. The complete bibliographic information, a link to the original publication on https://www.researchprotocols.org, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://www.researchprotocols.org/2025/1/e58567" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>There has been an unprecedented effort to sequence the SARS-CoV-2 virus and examine its molecular evolution. This has been facilitated by the availability of publicly accessible databases, such as the GISAID (Global Initiative on Sharing All Influenza Data) and GenBank, which collectively hold millions of SARS-CoV-2 sequence records. Genomic epidemiology, however, seeks to go beyond phylogenetic (the study of evolutionary relationships among biological entities) analysis by linking genetic information to patient characteristics and disease outcomes, enabling a comprehensive understanding of transmission dynamics and disease impact. While these repositories include fields reflecting patient-related metadata for a given sequence, the inclusion of these demographic and clinical details is scarce. The current understanding of patient-related metadata in published sequencing studies and its quality remains unexplored.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>Our review aims to quantitatively assess the extent and quality of patient-reported metadata in papers reporting original whole genome sequencing of the SARS-CoV-2 virus and analyze publication patterns using bibliometric analysis. Finally, we will evaluate the efficacy and reliability of a machine learning classifier in accurately identifying relevant papers for inclusion in the scoping review.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>The National Institutes of Health’s LitCovid collection will be used for the automated classification of papers reporting having deposited SARS-CoV-2 sequences in public repositories, while an independent search will be conducted in MEDLINE and PubMed Central for validation. Data extraction will be conducted using Covidence (Veritas Health Innovation Ltd). The extracted data will be synthesized and summarized to quantify the availability of patient metadata in the published literature of SARS-CoV-2 sequencing studies. For the bibliometric analysis, relevant data points, such as author affiliations, citation metrics, author keywords, and Medical Subject Headings terms will be extracted.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>This study is expected to be completed in early 2025. Our classification model has been developed and we have classified publications in LitCovid published through February 2023. As of September 2024, papers through August 2024 are being prepared for processing. Screening is underway for validated papers from the classifier. Direct literature searches and screening of the results began in October 2024. We will summarize and narratively describe our findings using tables, graphs, and charts where applicable.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>This scoping review will report findings on the extent and types of patient-related metadata reported in genomic viral sequencing studies of SARS-CoV-2, identify gaps in the reporting of patient metadata, and make recommendations for improving the quality and consistency of reporting in this area. The bibliometric analysis will uncover trends and patterns in the reporting of patient-related metadata, including differences in reporting based on study types or geographic regions. The insights gained from this study may help improve the quality and consistency of reporting patient metadata, enhancing the utility of sequence metadata and facilitating future research on infectious diseases.</p>
        </sec>
        <sec sec-type="trial registration">
          <title>Trial Registration</title>
          <p>OSF Registries osf.io/wrh95; https://doi.org/10.17605/OSF.IO/WRH95</p>
        </sec>
        <sec sec-type="registered-report">
          <title>International Registered Report Identifier (IRRID)</title>
          <p>DERR1-10.2196/58567</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>SARS-CoV-2</kwd>
        <kwd>COVID-19</kwd>
        <kwd>genomic epidemiology</kwd>
        <kwd>GISAID</kwd>
        <kwd>GenBank</kwd>
        <kwd>sequence records</kwd>
        <kwd>patient-related metadata</kwd>
        <kwd>scoping review</kwd>
        <kwd>protocol</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <sec>
        <title>Background</title>
        <p>Since the onset of the COVID-19 pandemic, there has been an unprecedented effort in genomic epidemiology (genomic epidemiology links pathogen genomes with associated metadata to understand disease transmission) to sequence the virus, study its transmission, and examine molecular evolution. Public repositories, such as the GISAID (Global Initiative on Sharing Avian Influenza Data) [<xref ref-type="bibr" rid="ref1">1</xref>] and the National Center for Biotechnology Information (NCBI)’s GenBank [<xref ref-type="bibr" rid="ref2">2</xref>] host millions of SARS-CoV-2 sequence records. As of September 2024, GISAID contains 16.9 million sequences, while over 8.9 million have been deposited in GenBank.</p>
        <p>The availability of this vast amount of genomic data has facilitated significant discoveries, particularly in phylogenetic (the study of evolutionary relationships among biological entities) and phylodynamic (the reconstruction of epidemiological and immunological processes from the shape of phylogenetic tree relating infections) studies [<xref ref-type="bibr" rid="ref3">3</xref>-<xref ref-type="bibr" rid="ref5">5</xref>]. Beyond phylogenetic studies, genomic epidemiology aims to understand the transmission dynamics, evolution, and impact of infectious diseases by analyzing the genetic information of pathogens and linking it to patient demographics and disease outcomes [<xref ref-type="bibr" rid="ref6">6</xref>,<xref ref-type="bibr" rid="ref7">7</xref>]. This work enables the tracking of the spread of pathogens, identifying high-risk populations, and discovering genetic factors that influence disease transmission, severity, and treatment response [<xref ref-type="bibr" rid="ref6">6</xref>,<xref ref-type="bibr" rid="ref8">8</xref>]. This knowledge can, in turn, inform public health strategies, guide the development of targeted interventions, and improve the overall understanding of infectious diseases [<xref ref-type="bibr" rid="ref9">9</xref>].</p>
        <p>Ideally, patient geographic, demographic, and clinical information (such as disease severity and outcome) would be included in the sequence metadata upon its submission to the repository. Both GISAID and GenBank frequently provide the location of the infected host information in their sequence metadata, however, the reported location granularity may vary and often lacks important details such as patient travel history. Similarly, patient demographic and clinical information is rarely complete. A review of available metadata in these 2 large public repositories for SARS-CoV-2 sequences, conducted by the authors in April 2023, found 58.34% (8,943,721/15,329,810) of sequences in GISAID do not include the specific age and 58.58% (8,980,046/15,329,810) do not include the specific gender of the infected host. The information for these may be entered as unknown (eg, “not available,” “declined,” “not reported”). GenBank lacks standardized fields to include age or gender information with sequence submissions.</p>
        <p>Several studies have highlighted the importance and challenges of metadata reporting in SARS-CoV-2 research and identified several shortcomings in the metadata that accompany these sequences [<xref ref-type="bibr" rid="ref10">10</xref>,<xref ref-type="bibr" rid="ref11">11</xref>], particularly deficiencies in the completeness and standardization of the reported data. Proposals have been made for the standardization of this data, but they have not been widely adopted [<xref ref-type="bibr" rid="ref12">12</xref>]. Another review highlighted the importance of patient-related metadata for genomic epidemiology in general but provided no assessment of the availability of these data [<xref ref-type="bibr" rid="ref13">13</xref>]. These studies collectively emphasize the critical need for improved metadata reporting practices, but they do not provide a comprehensive analysis of patient-related metadata reporting specifically in SARS-CoV-2 sequencing studies across multiple repositories or publications such as what we propose.</p>
        <p>Previous research has found that sequence metadata can be enhanced for the location of the infected host using natural language processing and machine learning methods to automatically extract and link this information to the sequence record [<xref ref-type="bibr" rid="ref14">14</xref>,<xref ref-type="bibr" rid="ref15">15</xref>]. This patient-related information, or at least a subset of it, may be reported in the published studies of those who obtained and performed the genomic sequencing allowing these methods to be extended and applied to SARS-CoV-2 sequences. However, the extent to which patient-related geographic information, such as their residence or travel history, is reported in SARS-CoV-2 sequencing studies remains largely unexplored. Similarly for patient demographics or other clinical information. Our review aims to bridge this gap in understanding by quantifying the extent and types of patient-related metadata reported in published genomic viral sequencing studies of SARS-CoV-2.</p>
        <p>Traditionally, identifying studies for a review requires the development of a detailed search strategy of databases using keywords and index terms, querying the titles and abstracts of published papers. The selection of keywords greatly influences search results, leading to potentially missed studies and the inclusion of potentially irrelevant studies. Moreover, for the particular focus of our study, discussions of sequencing are often confined to the methods section of papers, rendering title and abstract screening less informative. While more than 437,000 research papers [<xref ref-type="bibr" rid="ref16">16</xref>] related to SARS-CoV-2 and the pandemic have been published, there is sparse linkage between the sequence and publication databases. This makes it difficult to identify publications relevant to the sequences, and severely limits meta-analyses and scaling studies by using datasets produced by different investigators. To overcome these limitations, we propose using an automated classifier to identify relevant studies for review. In addition, we will use a traditional database search to validate and compare the approaches.</p>
        <p>A bibliometric analysis uses different methods and data points to quantify the trends and assess the impact of publications in a specific field [<xref ref-type="bibr" rid="ref17">17</xref>]. While several bibliometric analyses have investigated COVID-19–related research trends in general [<xref ref-type="bibr" rid="ref18">18</xref>-<xref ref-type="bibr" rid="ref20">20</xref>] and in specific fields such as neurology [<xref ref-type="bibr" rid="ref21">21</xref>], long COVID [<xref ref-type="bibr" rid="ref22">22</xref>], and medical imaging [<xref ref-type="bibr" rid="ref23">23</xref>], or for specific geographic locations such as Africa [<xref ref-type="bibr" rid="ref24">24</xref>], no bibliometric analysis exists specifically focused on reporting patterns of patient metadata in sequencing studies related to the SARS-CoV-2 genome, nor examined how reporting practices evolved throughout the pandemic. We hypothesize that using bibliometric indicators, differences in metadata reporting will be seen based on study type, institution, and size, with smaller, clinical-based studies reporting more information than larger, surveillance studies.</p>
        <p>Our aims with this review and analysis are to address the gaps in the understanding of the extent and quantity of patient-related metadata reporting in genomic sequencing studies by providing a comprehensive assessment of this reporting in the published SARS-CoV-2 sequencing studies. Using bibliometric methods, we will systematically examine factors that may influence metadata reporting in publications associated with SARS-CoV-2 sequence reporting over the course of the pandemic. By combining detailed content analysis of patient metadata with bibliometric analysis, we can identify factors that influence reporting practices, such as journal or institutional policies, international collaborations, or study types as well as highlight the gaps in reporting that may hinder the advancement of genomic epidemiology studies of the COVID-19 pandemic.</p>
      </sec>
      <sec>
        <title>Primary Research Objectives</title>
        <p>The primary research objectives are the following: (1) To quantitatively assess the extent and quality of patient-reported metadata, including demographic, clinical, and geographic information, in papers reporting original whole genome sequencing of the SARS-CoV-2 virus. (2) To perform a comprehensive bibliometric analysis to ascertain differences and discernible patterns between papers that include patient metadata and those that do not, thereby providing insights into the characteristics and factors associated with the reporting of patient data in the literature. (3) To evaluate the efficacy and reliability of a machine learning classifier in accurately identifying relevant papers for inclusion in the scoping review, enhancing the efficiency and effectiveness of this study’s selection process.</p>
      </sec>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Study Design</title>
        <p>Our scoping review will follow the methodological framework identified by Arksey and O’Malley [<xref ref-type="bibr" rid="ref25">25</xref>] and will be reported in line with the PRISMA-ScR (Preferred Reporting Items for Systematic Reviews and Meta-Analyses extension for Scoping Reviews) checklist [<xref ref-type="bibr" rid="ref26">26</xref>] (<xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>).</p>
      </sec>
      <sec>
        <title>Data Sources</title>
        <p>We will use the National Institutes of Health’s LitCovid collection [<xref ref-type="bibr" rid="ref16">16</xref>] for our machine learning classification. LitCovid is a curated collection of scholarly papers related to COVID-19. As of November 2024, the collection contains over 437,000 publications from 8000 journals and is updated daily. LitCovid includes published papers as well as preprints. Additionally, we will independently search Ovid MEDLINE and PubMed Central directly using a 2-faceted search strategy and the NCBI e-utilities program to find publications linked to sequences. This combined approach will help ensure a comprehensive coverage of the literature for our study.</p>
      </sec>
      <sec>
        <title>Search Strategy</title>
        <sec>
          <title>Classification Model</title>
          <p>The details of our classification model have been previously reported [<xref ref-type="bibr" rid="ref27">27</xref>]. Briefly, our classification model was trained using manually annotated data. A full-text search strategy was developed to filter the LitCovid collection resulting in a corpus of targeted papers for annotation. The papers identified through the pipeline were annotated by 2 experienced annotators using the INCEpTION annotation tool [<xref ref-type="bibr" rid="ref28">28</xref>] and following methodically created annotation guidelines. The annotators reviewed the full text of 245 randomly selected papers and labeled sentences, which confirmed this study’s performance of SAR-CoV-2 sample sequencing from human specimens. The interannotator agreement for the annotation was measured using Cohen κ. The score for agreement on whether the paper reported original viral sequencing was 1, and sentence agreement, which was calculated on papers that reported sequencing (n=74), was moderate [<xref ref-type="bibr" rid="ref29">29</xref>] (k=0.71). Disagreements were resolved by a third annotator. The final annotated corpus consisted of 50,918 sentences from 245 papers. There were 74 papers that reported SARS-CoV-2 sequencing and, within these papers, 347 sentences were annotated as positive. We split our annotated dataset into 3 random sets: a training set of 147 papers (31,885 sentences), a validation set of 49 papers (9017 sentences), and a test set of 49 papers (10,016 sentences). For our classifier, we pretrained a transformer-based neural network, specifically a bert-base-uncased [<xref ref-type="bibr" rid="ref30">30</xref>] model from the Hugging Face library. On the held-out test set, the classifier achieved an <italic>F</italic><sub>1</sub>-score of 0.48 (precision=0.492 and recall=0.469) for identifying sentences that provided evidence of generating new SARS-CoV-2 sequences. While the classifier achieved moderate performance at the sentence level, assessing the performance at the paper level, meaning at least 1 sentence in the paper that indicated sequencing was detected, the classifier achieved a more robust performance of <italic>F</italic><sub>1</sub>-score of 0.8 (precision=0.667 and recall=1).</p>
        </sec>
        <sec>
          <title>Database Search Strategy</title>
          <p>To evaluate our classifier and identify studies that may have been missed due to classification errors or the lack of full text in the LitCovid collection, we will create a search strategy to independently search MEDLINE and PubMed Central. We will develop a 2-faceted search strategy to find “SARS-CoV-2” and “whole genome sequencing” related publications. We will use the search strategy developed for the LitCovid collection with additional keywords added to identify studies that report whole genome sequencing. A sample search strategy is found in <xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>. Additionally, we will search for publications linked to SARS-CoV-2 sequences using the NCBI’s e-utilities eLink programming application programming interface. We will also search gray literature sources, such as Google Scholar and review the reference lists of included studies [<xref ref-type="bibr" rid="ref31">31</xref>].</p>
          <p>A publication date restriction of December 2019 onward will be used in the searches as this review is focused on SARS-CoV-2 sequencing studies. No language restrictions will be placed on the searches, although financial and logistical restraints will not allow translation from all languages.</p>
        </sec>
        <sec>
          <title>Inclusion or Exclusion Criteria</title>
          <p>Papers positively identified by our classifier and our search results will be reviewed for inclusion in the review based on the criteria outlined in <xref ref-type="table" rid="table1">Table 1</xref>.</p>
          <table-wrap position="float" id="table1">
            <label>Table 1</label>
            <caption>
              <p>Inclusion and exclusion criteria for the scoping review.</p>
            </caption>
            <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
              <col width="200"/>
              <col width="310"/>
              <col width="490"/>
              <thead>
                <tr valign="top">
                  <td>Facet</td>
                  <td>Inclusion criteria</td>
                  <td>Exclusion criteria</td>
                </tr>
              </thead>
              <tbody>
                <tr valign="top">
                  <td>Sample origin</td>
                  <td>
                    <list list-type="bullet">
                      <list-item>
                        <p>Individual human subject</p>
                      </list-item>
                    </list>
                  </td>
                  <td>
                    <list list-type="bullet">
                      <list-item>
                        <p>Nonhuman sources (eg, mice, bats, and ferrets)</p>
                      </list-item>
                      <list-item>
                        <p>Wastewater</p>
                      </list-item>
                      <list-item>
                        <p>Microbiome</p>
                      </list-item>
                      <list-item>
                        <p>Cloned or cell culture virus</p>
                      </list-item>
                    </list>
                  </td>
                </tr>
                <tr valign="top">
                  <td>Sequencing type</td>
                  <td>
                    <list list-type="bullet">
                      <list-item>
                        <p>Whole genomic sequencing, including partial or complete sequence results</p>
                      </list-item>
                    </list>
                  </td>
                  <td>Studies will be excluded if the following sequencing methods were exclusively performed:<break/><list list-type="bullet"><list-item><p>Polymerase chain reaction or loop-mediated isothermal amplification for viral detection</p></list-item><list-item><p>Single-cell sequencing</p></list-item><list-item><p>Gene expression studies</p></list-item><list-item><p>Protocol validation studies on cell culture virus</p></list-item><list-item><p>Exome sequencing</p></list-item></list></td>
                </tr>
                <tr valign="top">
                  <td>Study design</td>
                  <td>
                    <list list-type="bullet">
                      <list-item>
                        <p>Any type of peer-reviewed or preprint study reporting on the original sequencing of SARS-CoV-2 samples.</p>
                      </list-item>
                      <list-item>
                        <p>The study reports the deposit of the sequences into a data repository</p>
                      </list-item>
                    </list>
                  </td>
                  <td>
                    <list list-type="bullet">
                      <list-item>
                        <p>Any other study design</p>
                      </list-item>
                      <list-item>
                        <p>Any study that does not report the depositing of sequences into a data repository</p>
                      </list-item>
                    </list>
                  </td>
                </tr>
                <tr valign="top">
                  <td>Publication dates</td>
                  <td>
                    <list list-type="bullet">
                      <list-item>
                        <p>December 2019 or later</p>
                      </list-item>
                    </list>
                  </td>
                  <td>
                    <list list-type="bullet">
                      <list-item>
                        <p>Before December 2019</p>
                      </list-item>
                    </list>
                  </td>
                </tr>
                <tr valign="top">
                  <td>Language</td>
                  <td>
                    <list list-type="bullet">
                      <list-item>
                        <p>All</p>
                      </list-item>
                    </list>
                  </td>
                  <td>
                    <list list-type="bullet">
                      <list-item>
                        <p>None</p>
                      </list-item>
                    </list>
                  </td>
                </tr>
              </tbody>
            </table>
          </table-wrap>
        </sec>
        <sec>
          <title>Screening and Paper Selection</title>
          <p>Two reviewers will perform title and abstract screening using the Covidence systematic review management tool with any disagreements resolved by discussion. We will screen the papers from the different methods in a systematic order (<xref rid="figure1" ref-type="fig">Figure 1</xref>). First, we will validate and screen the results from our classifier’s predictions on the LitCovid collection. Next, we will screen the papers obtained from our database searches. All results will be uploaded to a Zotero library where duplicate results will be removed. We will then identify if a paper is in the LitCovid collection; those that are not will be moved to screening. For those that are, we will assess whether the paper was screened in the first round, those that were not will be screened in this round. Lastly, for papers identified as having links to GenBank records through NCBI’s eLink programming application programming interface, we will identify if any of the resulting papers had been screened in the previous 2 rounds, those that have not will then be screened.</p>
          <fig id="figure1" position="float">
            <label>Figure 1</label>
            <caption>
              <p>Flow diagram of proposed screening of identified papers. We will first screen papers from our classifier, then we will screen those identified from database searches to ensure there are no duplicate records screened.</p>
            </caption>
            <graphic xlink:href="resprot_v14i1e58567_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
          <p>Two independent reviewers will also conduct a full-text review in Covidence. To ensure interrater reliability, a subset of 10% of the screened studies will be independently reviewed by both reviewers. We will assess the level of agreement between reviewers using the Cohen κ coefficient [<xref ref-type="bibr" rid="ref29">29</xref>]. Any discrepancies will be resolved through discussion. We will report the excluded studies with the reason for exclusion.</p>
        </sec>
      </sec>
      <sec>
        <title>Data Extraction</title>
        <p>Data extraction will be conducted in Covidence. The reviewers will examine the full text of the papers, including any supplementary files, for data extraction. The customizable interface will be designed to prompt the reviewer to extract various details, such as general publication information, study characteristics, sequencing specifics, and the presence or absence of the patient’s demographic, clinical, or geographic information about where the patient resides or had traveled before sample collection, or the location of where the sample was collected. For studies with reported patient metadata, we will note whether information is reported per individual or in aggregate. For missing or incomplete metadata, we will categorize the absence using the following classifications: explicitly withheld for privacy, deidentified before sequencing, partially reported, or not reported. Furthermore, the section where the reported patient metadata within the papers was reported will be noted, for example, text, table, or supplemental materials. An example of the data extraction form can be found in <xref ref-type="table" rid="table2">Table 2</xref>. As this scoping review aims to report on the current state of published reports of patient-related metadata, we will not contact authors for any missing or additional data not found in the paper.</p>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>Example of data that will be extracted from included studies.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="570"/>
            <col width="0"/>
            <col width="400"/>
            <thead>
              <tr valign="top">
                <td colspan="3">Prompt</td>
                <td>Response</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="4">
                  <bold>Publication information</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Study name</td>
                <td colspan="2">Free text</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Paper title</td>
                <td colspan="2">Free text</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Year of publication</td>
                <td colspan="2">YYYY</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Publication type</td>
                <td colspan="2">Journal, conference, and preprint</td>
              </tr>
              <tr valign="top">
                <td colspan="4">
                  <bold>Study and sequence information</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Study objective</td>
                <td colspan="2">Free text</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Location of study (country)</td>
                <td colspan="2">Free text</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Number of patients</td>
                <td colspan="2">Free text</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Number of samples sequenced</td>
                <td colspan="2">Free text</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Short description of how the generated sequences were used in the paper</td>
                <td colspan="2">Free text</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Repository sequences deposited to</td>
                <td colspan="2">GISAID<sup>a</sup>, GenBank, other, or NR<sup>b</sup></td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>For studies with &#62;1 patient, are sequences linked to a patient?</td>
                <td colspan="2">Yes or no</td>
              </tr>
              <tr valign="top">
                <td colspan="4">
                  <bold>Patient demographic information reported</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Age</td>
                <td colspan="2">Yes or no</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Gender</td>
                <td colspan="2">Yes or no</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Race or ethnicity</td>
                <td colspan="2">Yes or no</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>If yes to any of the above, where in the paper was the information located</td>
                <td colspan="2">Text, table, or supplemental</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Reporting level</td>
                <td colspan="2">Individual or aggregate</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>If not reported, the reason</td>
                <td colspan="2">Privacy, deidentified, NR, or partial</td>
              </tr>
              <tr valign="top">
                <td colspan="4">
                  <bold>Patient clinical</bold>
                  <bold>information reported</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Symptoms</td>
                <td colspan="2">Yes or no</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Severity</td>
                <td colspan="2">Yes or no</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Inpatient or outpatient</td>
                <td colspan="2">Yes or no</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Treatments</td>
                <td colspan="2">Yes or no</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Outcomes</td>
                <td colspan="2">Yes or no</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>If yes to any of the above, where in the paper was the information located</td>
                <td colspan="2">Text, table, and supplemental</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Reporting level</td>
                <td colspan="2">Individual or aggregate</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>If not reported, the reason</td>
                <td colspan="2">Privacy, deidentified, NR, or partial</td>
              </tr>
              <tr valign="top">
                <td colspan="4">
                  <bold>Patient geographic</bold>
                  <bold>information reported</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Location of residence</td>
                <td colspan="2">Yes or no</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Travel information</td>
                <td colspan="2">Yes or no</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>If yes to any of the above, where in the paper was the information located</td>
                <td colspan="2">Text, table, or supplemental</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Reporting level</td>
                <td colspan="2">Individual or aggregate</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>If not reported, the reason</td>
                <td colspan="2">Privacy, deidentified, NR, or partial</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table2fn1">
              <p><sup>a</sup>GISAID: Global Initiative on Sharing All Influenza Data.</p>
            </fn>
            <fn id="table2fn2">
              <p><sup>b</sup>Not reported.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <p>We will test the initial extraction form on a subset of papers and revise it as needed.</p>
        <p>For bibliometric analysis, all pertinent data points will be extracted for studies included in our review including, author location and institution information, journal, study type, citation metrics, and author keywords or Medical Subject Headings terms when available.</p>
      </sec>
      <sec>
        <title>Data Analysis</title>
        <p>The extracted data will be synthesized and summarized to quantify the availability of patient metadata in the published literature of SARS-CoV-2 sequencing studies using an exported spreadsheet from Covidence. We will summarize and narratively describe our findings, using tables, graphs, and charts when applicable, regarding the number of sequences covered in our included studies, the distribution of the sequences in the respective repositories, and the quantity and type of reported patient metadata in the papers.</p>
        <p>For the bibliometric analysis, data will be analyzed and visualized using the VOSviewer software or the <italic>bibliometrix</italic> [<xref ref-type="bibr" rid="ref32">32</xref>] package for R (R Foundation). These will include publication metrics (eg, annual trends, and distribution by journal and country), author metrics (eg, collaboration networks or productivity), and citation analysis (eg, total and average citations, or highly cited papers). We will present the geographical location of the paper’s authors using maps to show the geographic distribution of research output and report our findings, including the most frequent journals and paper types using narrative descriptions or tables. We will use the data extracted from our review to analyze differences between studies that reported patient metadata from those that did not. Co-occurrence networks of author keywords will be presented to highlight the frequency and differences in themes and study type (eg, clinical study, case report, and surveillance study) between these reporting groups. We will analyze coauthorship networks and institutional collaborations to assess if highly collaborative studies are associated with more comprehensive metadata reporting. We will also analyze associations between study location, the potential impact of journal-related policies or characteristics, and the extent of metadata reporting. Specifically, we will examine the proportion of studies reporting different types of metadata (demographic, clinical, and geographic), trends in metadata reporting over time, and potential correlations between metadata reporting and other bibliometric indicators such as citations or journal impact factors. In addition to VOSviewer and <italic>bibliometrix</italic>, we will use the R statistical software to develop scripts for specific analyses related to metadata reporting trends.</p>
        <p>As this is a scoping review (and not a systematic review), accepted practice [<xref ref-type="bibr" rid="ref33">33</xref>] indicates that it need not include an assessment of the methodological quality (risk of bias assessment) of the papers or conduct any evidence synthesis.</p>
      </sec>
      <sec>
        <title>Ethical Considerations</title>
        <p>This scoping review will consist of collecting and reviewing publicly available data from previously published studies and does not require any ethical approval. Furthermore, quantitative results will be reported in aggregate across the included studies. The results and findings of the completed scoping review will be disseminated through the submission of a paper for peer-reviewed publication and through scientific conferences. This paper will reference this protocol, and any changes or deviations made from this protocol will be acknowledged and justified.</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <p>This protocol has been registered at the Open Science Framework registries. This study is expected to be completed in early 2025. Our classification model has been developed and we have classified publications in LitCovid published through February 2023. As of September 2024, papers through August 2024 are being prepared for processing. Screening is underway for validated papers from the classifier. Direct literature searches and screening of the results began in November 2024. We will quantitatively summarize and narratively describe our findings, using tables, graphs, and charts when applicable.</p>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <p>The anticipated findings of this scoping review will provide a comprehensive overview of the current state of patient-related metadata reporting in SARS-CoV-2 sequencing studies. We expect to identify gaps in reporting practices, variations across different types of studies or geographic regions, and potential areas for improvement in metadata reporting standardization. In addition to the findings of our scoping review, the bibliometric analysis will likely identify several other important trends and patterns in the reporting of patient-related metadata. For example, the analysis may find that the reporting of patient-related metadata is more common in certain types of studies, or that it is more likely to be reported in studies from certain geographic regions. The findings of the scoping review and bibliometric analysis will provide valuable insights into the factors that influence the reporting of patient-related metadata and will help to inform future research on this topic.</p>
        <p>The COVID-19 pandemic has spurred an unprecedented volume of research, including extensive efforts in genomic sequencing of SARS-CoV-2. However, the utility of these sequences for genomic epidemiology may not be fully realized due to the unavailability of relevant metadata about the patient from whom the specimen was obtained [<xref ref-type="bibr" rid="ref34">34</xref>]. Shortcomings of this metadata that may accompany these sequences in the data repositories have been extensively noted [<xref ref-type="bibr" rid="ref10">10</xref>-<xref ref-type="bibr" rid="ref12">12</xref>]. Methods exist that facilitate the extraction of this data from other resources, such as published literature [<xref ref-type="bibr" rid="ref14">14</xref>,<xref ref-type="bibr" rid="ref15">15</xref>,<xref ref-type="bibr" rid="ref35">35</xref>]. The identification and quantification of the metadata in literature may aid in advancing future research.</p>
      </sec>
      <sec>
        <title>Future Directions</title>
        <p>Our study may lay the groundwork for determining the feasibility of the development of automated methods to extract patient-related metadata from publications to enrich sequences. These enriched sequences can be made available through a publicly shared repository. The availability of such a comprehensive resource could facilitate studies that compare how the inclusion of additional metadata impacts the conclusions and utility of genomic epidemiology studies. This could help quantify the importance of comprehensive metadata reporting, and potentially provide the impetus for researchers to improve their reporting practice.</p>
        <p>Beyond the practices of researchers, there may be other factors that determine whether the patient metadata is published, such as journal data-sharing policies. Based on the findings of this scoping review researchers could develop and propose standardized guidelines for reporting patient-related metadata in SARS-CoV-2 sequencing studies. These guidelines could help improve the consistency and completeness of metadata reporting across future studies, enhancing the value of genomic sequences for epidemiological research.</p>
        <p>Moreover, our study may reveal insights into the role privacy concerns play in the reporting of relevant patient metadata. This insight could guide targeted interventions to improve reporting practices while also addressing critical patient privacy concerns. Future work could explore the development of privacy-preserving methods for sharing more comprehensive metadata.</p>
        <p>By providing a comprehensive overview of current metadata reporting practices, the results of this scoping review may support efforts to enhance both the completeness and ethical handling of patient-related metadata in genomic epidemiology research. These improvements could significantly advance our understanding of SARS-CoV-2 transmission dynamics and inform strategies for managing this and future pandemics.</p>
      </sec>
      <sec>
        <title>Strengths and Limitations</title>
        <p>We propose a novel approach to identify relevant papers with the development of an automated classifier that will locate within the text of the paper sentences that indicate viral genome sequencing was performed in the paper. This method necessitates openly available, machine-readable texts which could bias our sample from this search to information in open-access papers. This bias should be limited in this study, however, as there was a commitment from publishers early in the COVID-19 pandemic to make content related to the pandemic open and available [<xref ref-type="bibr" rid="ref36">36</xref>]. Furthermore, we will also conduct an independent search from databases outside of LitCovid to identify any potentially missed papers from our classifier or gaps in the LitCovid collection ensuring a more comprehensive and relevant collection of papers to include in our review. Still, there remains the possibility that some relevant studies may be missed due to search limitations which may lead to an under or overestimation of the extent of metadata reporting. While we aim to follow the best practices in methodology and reporting by adhering to the PRISMA-ScR checklist, we do deviate from standard practice for identifying studies through the use of a classifier. This approach will allow us to identify sequencing studies that may not be apparent from traditional title or abstract screening alone. Other limitations exist, such as potential limitations in reported patient metadata [<xref ref-type="bibr" rid="ref37">37</xref>,<xref ref-type="bibr" rid="ref38">38</xref>] and the focus on SARS-CoV-2 sequencing studies, which may limit the applicability of our findings to other pathogens or pandemics. There may also be a gap in publication time between the depositing of sequences and the publication of the paper. Furthermore, reporting patterns may differ from early in the pandemic due to the urgent need to disseminate information, reporting practices and requirements in publications may have changed over the course of the pandemic, and research priorities may have changed as the pandemic continued. Any of these scenarios may affect the ability to draw definitive conclusions about trends in metadata reporting over time.</p>
      </sec>
      <sec>
        <title>Conclusion</title>
        <p>This protocol outlines the steps that we will take in our scoping review which will be supported by an automated classifier and bibliometric analysis. We will fill the knowledge gap regarding the extent and types of patient-related metadata reported in genomic viral sequencing studies of SARS-CoV-2 and will provide valuable insights by identifying themes and trends in the published literature. The results of this study may encourage improved and standardized reporting practices which will significantly enhance the utility of sequence metadata and aid in advancing our understanding of the SARS-CoV-2 or any future pandemic. Future research can build upon our study to address these gaps and enhance reporting practices in this field.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>PRISMA-ScR (Preferred Reporting Items for Systematic Reviews and Meta-Analyses extension for Scoping Reviews) checklist.</p>
        <media xlink:href="resprot_v14i1e58567_app1.docx" xlink:title="DOCX File , 84 KB"/>
      </supplementary-material>
      <supplementary-material id="app2">
        <label>Multimedia Appendix 2</label>
        <p>Sample search strategy for Ovid MEDLINE.</p>
        <media xlink:href="resprot_v14i1e58567_app2.docx" xlink:title="DOCX File , 17 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">GISAID</term>
          <def>
            <p>Global Initiative on Sharing All Influenza Data</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">NCBI</term>
          <def>
            <p>National Center for Biotechnology Information</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">PRISMA-ScR</term>
          <def>
            <p>Preferred Reporting Items for Systematic Reviews and Meta-Analyses Extension for Scoping Reviews</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>Research reported in this publication was supported by the National Institute of Allergy and Infectious Diseases of the National Institutes of Health (award R01AI164481 to GG-H and MS). The National Institutes of Health’s National Institute of Allergy and Infectious Diseases funded this research but were not involved in the conceptualization, design, data collection, analysis, decision to publish, or preparation of this paper. The views expressed in this paper are those of the authors and not those of the National Institutes of Health.</p>
    </ack>
    <notes>
      <sec>
        <title>Data Availability</title>
        <p>This study will analyze and synthesize previously published information. Data-sharing does not apply to this paper as no datasets were generated or analyzed during this study. We will submit for publication the completed scoping review and bibliometric analysis. At that time, any extracted data and data generated in our analysis will be made available with the publication.</p>
      </sec>
    </notes>
    <fn-group>
      <fn fn-type="con">
        <p>KO, EL, MS, and GG-H designed this study. KO was a major contributor to the writing of this paper. DW designed the classification methods. KO and AE designed the annotation methods. All authors read, edited, and approved the final paper.</p>
      </fn>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Shu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>McCauley</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>GISAID: Global initiative on sharing all influenza data - from vision to reality</article-title>
          <source>Euro Surveill</source>
          <year>2017</year>
          <month>03</month>
          <day>30</day>
          <volume>22</volume>
          <issue>13</issue>
          <fpage>30494</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/28382917"/>
          </comment>
          <pub-id pub-id-type="doi">10.2807/1560-7917.ES.2017.22.13.30494</pub-id>
          <pub-id pub-id-type="medline">28382917</pub-id>
          <pub-id pub-id-type="pii">30494</pub-id>
          <pub-id pub-id-type="pmcid">PMC5388101</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sayers</surname>
              <given-names>EW</given-names>
            </name>
            <name name-style="western">
              <surname>Cavanaugh</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Clark</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Ostell</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Pruitt</surname>
              <given-names>KD</given-names>
            </name>
            <name name-style="western">
              <surname>Karsch-Mizrachi</surname>
              <given-names>I</given-names>
            </name>
          </person-group>
          <article-title>GenBank</article-title>
          <source>Nucleic Acids Res</source>
          <year>2019</year>
          <volume>47</volume>
          <issue>D1</issue>
          <fpage>D94</fpage>
          <lpage>D99</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/30365038"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/nar/gky989</pub-id>
          <pub-id pub-id-type="medline">30365038</pub-id>
          <pub-id pub-id-type="pii">5144964</pub-id>
          <pub-id pub-id-type="pmcid">PMC6323954</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Forster</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Forster</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Renfrew</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Forster</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Phylogenetic network analysis of SARS-CoV-2 genomes</article-title>
          <source>Proc Natl Acad Sci U S A</source>
          <year>2020</year>
          <volume>117</volume>
          <issue>17</issue>
          <fpage>9241</fpage>
          <lpage>9243</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.pnas.org/doi/abs/10.1073/pnas.2004999117?url_ver=Z39.88-2003&#38;rfr_id=ori:rid:crossref.org&#38;rfr_dat=cr_pub  0pubmed"/>
          </comment>
          <pub-id pub-id-type="doi">10.1073/pnas.2004999117</pub-id>
          <pub-id pub-id-type="medline">32269081</pub-id>
          <pub-id pub-id-type="pii">2004999117</pub-id>
          <pub-id pub-id-type="pmcid">PMC7196762</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>van Dorp</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Acman</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Richard</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Shaw</surname>
              <given-names>LP</given-names>
            </name>
            <name name-style="western">
              <surname>Ford</surname>
              <given-names>CE</given-names>
            </name>
            <name name-style="western">
              <surname>Ormond</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Owen</surname>
              <given-names>CJ</given-names>
            </name>
            <name name-style="western">
              <surname>Pang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Tan</surname>
              <given-names>CC</given-names>
            </name>
            <name name-style="western">
              <surname>Boshier</surname>
              <given-names>FA</given-names>
            </name>
            <name name-style="western">
              <surname>Ortiz</surname>
              <given-names>AT</given-names>
            </name>
            <name name-style="western">
              <surname>Balloux</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <article-title>Emergence of genomic diversity and recurrent mutations in SARS-CoV-2</article-title>
          <source>Infect Genet Evol</source>
          <year>2020</year>
          <volume>83</volume>
          <fpage>104351</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/32387564"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.meegid.2020.104351</pub-id>
          <pub-id pub-id-type="medline">32387564</pub-id>
          <pub-id pub-id-type="pii">S1567-1348(20)30182-9</pub-id>
          <pub-id pub-id-type="pmcid">PMC7199730</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Tang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Song</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Yao</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Duan</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Qian</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Cui</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Lu</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>On the origin and continuing evolution of SARS-CoV-2</article-title>
          <source>Natl Sci Rev</source>
          <year>2020</year>
          <volume>7</volume>
          <issue>6</issue>
          <fpage>1012</fpage>
          <lpage>1023</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/34676127"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/nsr/nwaa036</pub-id>
          <pub-id pub-id-type="medline">34676127</pub-id>
          <pub-id pub-id-type="pii">nwaa036</pub-id>
          <pub-id pub-id-type="pmcid">PMC7107875</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hill</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Ruis</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Bajaj</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Pybus</surname>
              <given-names>OG</given-names>
            </name>
            <name name-style="western">
              <surname>Kraemer</surname>
              <given-names>MU</given-names>
            </name>
          </person-group>
          <article-title>Progress and challenges in virus genomic epidemiology</article-title>
          <source>Trends Parasitol</source>
          <year>2021</year>
          <volume>37</volume>
          <issue>12</issue>
          <fpage>1038</fpage>
          <lpage>1049</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1471-4922(21)00205-1"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.pt.2021.08.007</pub-id>
          <pub-id pub-id-type="medline">34620561</pub-id>
          <pub-id pub-id-type="pii">S1471-4922(21)00205-1</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Tang</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Croxen</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Hasan</surname>
              <given-names>MR</given-names>
            </name>
            <name name-style="western">
              <surname>Hsiao</surname>
              <given-names>WW</given-names>
            </name>
            <name name-style="western">
              <surname>Hoang</surname>
              <given-names>LM</given-names>
            </name>
          </person-group>
          <article-title>Infection control in the new age of genomic epidemiology</article-title>
          <source>Am J Infect Control</source>
          <year>2017</year>
          <volume>45</volume>
          <issue>2</issue>
          <fpage>170</fpage>
          <lpage>179</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S0196-6553(16)30580-6"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.ajic.2016.05.015</pub-id>
          <pub-id pub-id-type="medline">28159067</pub-id>
          <pub-id pub-id-type="pii">S0196-6553(16)30580-6</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="web">
          <article-title>Genomic epidemiology data infrastructure needs for SARS-CoV-2: modernizing pandemic response strategies</article-title>
          <source>National Academies of Sciences, Engineering, and Medicine</source>
          <year>2020</year>
          <access-date>2025-02-06</access-date>
          <publisher-loc>Washington, DC</publisher-loc>
          <publisher-name>The National Academies Press</publisher-name>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.17226/25879">https://doi.org/10.17226/25879</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="web">
          <article-title>Genomic sequencing of SARS-CoV-2 a guide to implementation for maximum impact on public health</article-title>
          <source>World Health Organization</source>
          <year>2021</year>
          <access-date>2025-02-06</access-date>
          <publisher-loc>Geneva</publisher-loc>
          <publisher-name>World Health Organization</publisher-name>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://iris.who.int/bitstream/handle/10665/338480/9789240018440-eng.pdf">https://iris.who.int/bitstream/handle/10665/338480/9789240018440-eng.pdf</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gozashti</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Corbett-Detig</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Shortcomings of SARS-CoV-2 genomic metadata</article-title>
          <source>BMC Res Notes</source>
          <year>2021</year>
          <volume>14</volume>
          <issue>1</issue>
          <fpage>189</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcresnotes.biomedcentral.com/articles/10.1186/s13104-021-05605-9"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s13104-021-05605-9</pub-id>
          <pub-id pub-id-type="medline">34001211</pub-id>
          <pub-id pub-id-type="pii">10.1186/s13104-021-05605-9</pub-id>
          <pub-id pub-id-type="pmcid">PMC8128092</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Schriml</surname>
              <given-names>LM</given-names>
            </name>
            <name name-style="western">
              <surname>Chuvochina</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Davies</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Eloe-Fadrosh</surname>
              <given-names>EA</given-names>
            </name>
            <name name-style="western">
              <surname>Finn</surname>
              <given-names>RD</given-names>
            </name>
            <name name-style="western">
              <surname>Hugenholtz</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Hunter</surname>
              <given-names>CI</given-names>
            </name>
            <name name-style="western">
              <surname>Hurwitz</surname>
              <given-names>BL</given-names>
            </name>
            <name name-style="western">
              <surname>Kyrpides</surname>
              <given-names>NC</given-names>
            </name>
            <name name-style="western">
              <surname>Meyer</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Mizrachi</surname>
              <given-names>IK</given-names>
            </name>
            <name name-style="western">
              <surname>Sansone</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Sutton</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Tighe</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Walls</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>COVID-19 pandemic reveals the peril of ignoring metadata standards</article-title>
          <source>Sci Data</source>
          <year>2020</year>
          <volume>7</volume>
          <issue>1</issue>
          <fpage>188</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/s41597-020-0524-5"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41597-020-0524-5</pub-id>
          <pub-id pub-id-type="medline">32561801</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41597-020-0524-5</pub-id>
          <pub-id pub-id-type="pmcid">PMC7305141</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Griffiths</surname>
              <given-names>EJ</given-names>
            </name>
            <name name-style="western">
              <surname>Timme</surname>
              <given-names>RE</given-names>
            </name>
            <name name-style="western">
              <surname>Mendes</surname>
              <given-names>CI</given-names>
            </name>
            <name name-style="western">
              <surname>Page</surname>
              <given-names>AJ</given-names>
            </name>
            <name name-style="western">
              <surname>Alikhan</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Fornika</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Maguire</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Campos</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Park</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Olawoye</surname>
              <given-names>IB</given-names>
            </name>
            <name name-style="western">
              <surname>Oluniyi</surname>
              <given-names>PE</given-names>
            </name>
            <name name-style="western">
              <surname>Anderson</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Christoffels</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>da Silva</surname>
              <given-names>AG</given-names>
            </name>
            <name name-style="western">
              <surname>Cameron</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Dooley</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Katz</surname>
              <given-names>LS</given-names>
            </name>
            <name name-style="western">
              <surname>Black</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Karsch-Mizrachi</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Barrett</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Johnston</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Connor</surname>
              <given-names>TR</given-names>
            </name>
            <name name-style="western">
              <surname>Nicholls</surname>
              <given-names>SM</given-names>
            </name>
            <name name-style="western">
              <surname>Witney</surname>
              <given-names>AA</given-names>
            </name>
            <name name-style="western">
              <surname>Tyson</surname>
              <given-names>GH</given-names>
            </name>
            <name name-style="western">
              <surname>Tausch</surname>
              <given-names>SH</given-names>
            </name>
            <name name-style="western">
              <surname>Raphenya</surname>
              <given-names>AR</given-names>
            </name>
            <name name-style="western">
              <surname>Alcock</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Aanensen</surname>
              <given-names>DM</given-names>
            </name>
            <name name-style="western">
              <surname>Hodcroft</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Hsiao</surname>
              <given-names>WWL</given-names>
            </name>
            <name name-style="western">
              <surname>Vasconcelos</surname>
              <given-names>ATR</given-names>
            </name>
            <name name-style="western">
              <surname>MacCannell</surname>
              <given-names>DR</given-names>
            </name>
          </person-group>
          <article-title>Future-proofing and maximizing the utility of metadata: the PHA4GE SARS-CoV-2 contextual data specification package</article-title>
          <source>Gigascience</source>
          <year>2022</year>
          <volume>11</volume>
          <fpage>giac003</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/35169842"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/gigascience/giac003</pub-id>
          <pub-id pub-id-type="medline">35169842</pub-id>
          <pub-id pub-id-type="pii">6529104</pub-id>
          <pub-id pub-id-type="pmcid">PMC8847733</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Grubaugh</surname>
              <given-names>ND</given-names>
            </name>
            <name name-style="western">
              <surname>Ladner</surname>
              <given-names>JT</given-names>
            </name>
            <name name-style="western">
              <surname>Lemey</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Pybus</surname>
              <given-names>OG</given-names>
            </name>
            <name name-style="western">
              <surname>Rambaut</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Holmes</surname>
              <given-names>EC</given-names>
            </name>
            <name name-style="western">
              <surname>Andersen</surname>
              <given-names>KG</given-names>
            </name>
          </person-group>
          <article-title>Tracking virus outbreaks in the twenty-first century</article-title>
          <source>Nat Microbiol</source>
          <year>2019</year>
          <volume>4</volume>
          <issue>1</issue>
          <fpage>10</fpage>
          <lpage>19</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/30546099"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41564-018-0296-2</pub-id>
          <pub-id pub-id-type="medline">30546099</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41564-018-0296-2</pub-id>
          <pub-id pub-id-type="pmcid">PMC6345516</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Magge</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Weissenbacher</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>O'Connor</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Tahsin</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Gonzalez-Hernandez</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Scotch</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>GeoBoost2: a natural language processing pipeline for GenBank metadata enrichment for virus phylogeography</article-title>
          <source>Bioinformatics</source>
          <year>2020</year>
          <volume>36</volume>
          <issue>20</issue>
          <fpage>5120</fpage>
          <lpage>5121</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/32683454"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/bioinformatics/btaa647</pub-id>
          <pub-id pub-id-type="medline">32683454</pub-id>
          <pub-id pub-id-type="pii">5873583</pub-id>
          <pub-id pub-id-type="pmcid">PMC7755405</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Tahsin</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Weissenbacher</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>O'Connor</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Magge</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Scotch</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Gonzalez-Hernandez</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>GeoBoost: accelerating research involving the geospatial metadata of virus GenBank records</article-title>
          <source>Bioinformatics</source>
          <year>2018</year>
          <volume>34</volume>
          <issue>9</issue>
          <fpage>1606</fpage>
          <lpage>1608</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/29240889"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/bioinformatics/btx799</pub-id>
          <pub-id pub-id-type="medline">29240889</pub-id>
          <pub-id pub-id-type="pii">4731736</pub-id>
          <pub-id pub-id-type="pmcid">PMC5925778</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Allot</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Lu</surname>
              <given-names>Z</given-names>
            </name>
          </person-group>
          <article-title>LitCovid: an open database of COVID-19 literature</article-title>
          <source>Nucleic Acids Res</source>
          <year>2021</year>
          <volume>49</volume>
          <issue>D1</issue>
          <fpage>D1534</fpage>
          <lpage>D1540</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/33166392"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/nar/gkaa952</pub-id>
          <pub-id pub-id-type="medline">33166392</pub-id>
          <pub-id pub-id-type="pii">5964074</pub-id>
          <pub-id pub-id-type="pmcid">PMC7778958</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gutiérrez-Salcedo</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Martínez</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Moral-Munoz</surname>
              <given-names>JA</given-names>
            </name>
            <name name-style="western">
              <surname>Herrera-Viedma</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Cobo</surname>
              <given-names>MJ</given-names>
            </name>
          </person-group>
          <article-title>Some bibliometric procedures for analyzing and evaluating research fields</article-title>
          <source>Appl Intell</source>
          <year>2018</year>
          <volume>48</volume>
          <fpage>1275</fpage>
          <lpage>1287</lpage>
          <pub-id pub-id-type="doi">10.1007/s10489-017-1105-y</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hossain</surname>
              <given-names>MM</given-names>
            </name>
          </person-group>
          <article-title>Current status of global research on novel coronavirus disease (COVID-19): a bibliometric analysis and knowledge mapping</article-title>
          <source>SSRN Journal</source>
          <year>2020</year>
          <volume>9</volume>
          <issue>374</issue>
          <fpage>1</fpage>
          <lpage>12</lpage>
          <pub-id pub-id-type="doi">10.2139/ssrn.3547824</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Nasab</surname>
              <given-names>FR</given-names>
            </name>
            <name name-style="western">
              <surname>Rahim</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <article-title>Bibliometric analysis of global scientific research on SARS-CoV-2 (COVID-19)</article-title>
          <source>Cell J</source>
          <year>2021</year>
          <volume>23</volume>
          <issue>5</issue>
          <fpage>523</fpage>
          <lpage>531</lpage>
          <pub-id pub-id-type="doi">10.1101/2020.03.19.20038752</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Gu</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Zhong</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Zha</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Zhu</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>A bibliometric analysis using VOSviewer of publications on COVID-19</article-title>
          <source>Ann Transl Med</source>
          <year>2020</year>
          <volume>8</volume>
          <issue>13</issue>
          <fpage>816</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/32793661"/>
          </comment>
          <pub-id pub-id-type="doi">10.21037/atm-20-4235</pub-id>
          <pub-id pub-id-type="medline">32793661</pub-id>
          <pub-id pub-id-type="pii">atm-08-13-816</pub-id>
          <pub-id pub-id-type="pmcid">PMC7396244</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Weng</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>A bibliometric analysis of COVID-19 publications in neurology by using the visual mapping method</article-title>
          <source>Front Public Health</source>
          <year>2022</year>
          <volume>10</volume>
          <fpage>937008</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.frontiersin.org/articles/10.3389/fpubh.2022.937008"/>
          </comment>
          <pub-id pub-id-type="doi">10.3389/fpubh.2022.937008</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>TH</given-names>
            </name>
            <name name-style="western">
              <surname>Jeon</surname>
              <given-names>SR</given-names>
            </name>
            <name name-style="western">
              <surname>Kang</surname>
              <given-names>JW</given-names>
            </name>
            <name name-style="western">
              <surname>Kwon</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Complementary and alternative medicine for long COVID: scoping review and bibliometric analysis</article-title>
          <source>Evid Based Complement Alternat Med</source>
          <year>2022</year>
          <volume>2022</volume>
          <fpage>7303393</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1155/2022/7303393"/>
          </comment>
          <pub-id pub-id-type="doi">10.1155/2022/7303393</pub-id>
          <pub-id pub-id-type="medline">35966751</pub-id>
          <pub-id pub-id-type="pmcid">PMC9371860</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wen</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Gao</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Zhu</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Zeng</surname>
              <given-names>X</given-names>
            </name>
          </person-group>
          <article-title>COVID-19 imaging, where do we go from here? Bibliometric analysis of medical imaging in COVID-19</article-title>
          <source>Eur Radiol</source>
          <year>2023</year>
          <volume>33</volume>
          <issue>5</issue>
          <fpage>3133</fpage>
          <lpage>3143</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/36892649"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/s00330-023-09498-z</pub-id>
          <pub-id pub-id-type="medline">36892649</pub-id>
          <pub-id pub-id-type="pii">10.1007/s00330-023-09498-z</pub-id>
          <pub-id pub-id-type="pmcid">PMC9996554</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Guleid</surname>
              <given-names>FH</given-names>
            </name>
            <name name-style="western">
              <surname>Oyando</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Kabia</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Mumbi</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Akech</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Barasa</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>A bibliometric analysis of COVID-19 research in Africa</article-title>
          <source>BMJ Glob Health</source>
          <year>2021</year>
          <volume>6</volume>
          <issue>5</issue>
          <fpage>e005690</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://gh.bmj.com/lookup/pmidlookup?view=long&#38;pmid=33972261"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/bmjgh-2021-005690</pub-id>
          <pub-id pub-id-type="medline">33972261</pub-id>
          <pub-id pub-id-type="pii">bmjgh-2021-005690</pub-id>
          <pub-id pub-id-type="pmcid">PMC8111873</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Arksey</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>O'Malley</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Scoping studies: towards a methodological framework</article-title>
          <source>Int J Soc Res Methodol</source>
          <year>2005</year>
          <volume>8</volume>
          <issue>1</issue>
          <fpage>19</fpage>
          <lpage>32</lpage>
          <pub-id pub-id-type="doi">10.1080/1364557032000119616</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Tricco</surname>
              <given-names>AC</given-names>
            </name>
            <name name-style="western">
              <surname>Lillie</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Zarin</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>O'Brien</surname>
              <given-names>KK</given-names>
            </name>
            <name name-style="western">
              <surname>Colquhoun</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Levac</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Moher</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Peters</surname>
              <given-names>MD</given-names>
            </name>
            <name name-style="western">
              <surname>Horsley</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Weeks</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Hempel</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Akl</surname>
              <given-names>EA</given-names>
            </name>
            <name name-style="western">
              <surname>Chang</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>McGowan</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Stewart</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Hartling</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Aldcroft</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Wilson</surname>
              <given-names>MG</given-names>
            </name>
            <name name-style="western">
              <surname>Garritty</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Lewin</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Godfrey</surname>
              <given-names>CM</given-names>
            </name>
            <name name-style="western">
              <surname>Macdonald</surname>
              <given-names>MT</given-names>
            </name>
            <name name-style="western">
              <surname>Langlois</surname>
              <given-names>EV</given-names>
            </name>
            <name name-style="western">
              <surname>Soares-Weiser</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Moriarty</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Clifford</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Tunçalp</surname>
              <given-names>Ö</given-names>
            </name>
            <name name-style="western">
              <surname>Straus</surname>
              <given-names>SE</given-names>
            </name>
          </person-group>
          <article-title>PRISMA extension for scoping reviews (PRISMA-ScR): checklist and explanation</article-title>
          <source>Ann Intern Med</source>
          <year>2018</year>
          <volume>169</volume>
          <issue>7</issue>
          <fpage>467</fpage>
          <lpage>473</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.acpjournals.org/doi/abs/10.7326/M18-0850?url_ver=Z39.88-2003&#38;rfr_id=ori:rid:crossref.org&#38;rfr_dat=cr_pub  0pubmed"/>
          </comment>
          <pub-id pub-id-type="doi">10.7326/M18-0850</pub-id>
          <pub-id pub-id-type="medline">30178033</pub-id>
          <pub-id pub-id-type="pii">2700389</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Weissenbacher</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>O'Connor</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Klein</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Golder</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Flores</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Elyaderani</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Scotch</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Gonzalez-Hernandez</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Text mining biomedical literature to identify extremely unbalanced data for digital epidemiology and systematic reviews: dataset and methods for a SARS-CoV-2 genomic epidemiology study</article-title>
          <source>medRxiv. Preprint posted online on August 04, 2023</source>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/37577535"/>
          </comment>
          <pub-id pub-id-type="doi">10.1101/2023.07.29.23293370</pub-id>
          <pub-id pub-id-type="medline">37577535</pub-id>
          <pub-id pub-id-type="pii">2023.07.29.23293370</pub-id>
          <pub-id pub-id-type="pmcid">PMC10418574</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Klie</surname>
              <given-names>JC</given-names>
            </name>
            <name name-style="western">
              <surname>Bugert</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Boullosa</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>de</surname>
              <given-names>CR</given-names>
            </name>
            <name name-style="western">
              <surname>Gurevych</surname>
              <given-names>I</given-names>
            </name>
          </person-group>
          <article-title>The INCEpTION platform: machine-assisted and knowledge-oriented interactive annotation</article-title>
          <year>2018</year>
          <conf-name>Proceedings of the 27th International Conference on Computational Linguistics: System Demonstrations</conf-name>
          <conf-date>2025 February 03</conf-date>
          <conf-loc>Santa Fe, New Mexico</conf-loc>
          <publisher-name>Association for Computational Linguistics</publisher-name>
          <fpage>5</fpage>
          <lpage>9</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://aclanthology.org/C18-2000/"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>McHugh</surname>
              <given-names>ML</given-names>
            </name>
          </person-group>
          <article-title>Interrater reliability: the kappa statistic</article-title>
          <source>Biochem Med (Zagreb)</source>
          <year>2012</year>
          <volume>22</volume>
          <issue>3</issue>
          <fpage>276</fpage>
          <lpage>282</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/23092060"/>
          </comment>
          <pub-id pub-id-type="medline">23092060</pub-id>
          <pub-id pub-id-type="pmcid">PMC3900052</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Devlin</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Chang</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Google</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Language</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>BERT: pre-training of deep bidirectional transformers for language understanding</article-title>
          <year>2019</year>
          <conf-name>Proceedings of NAACL-HLT</conf-name>
          <conf-date>2019 June 2-7</conf-date>
          <conf-loc>Minneapolis, Minnesota</conf-loc>
          <publisher-name>Association for Computational Linguistics</publisher-name>
          <fpage>4171</fpage>
          <lpage>4186</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://github.com/tensorflow/tensor2tensor"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Greenhalgh</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Peacock</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Effectiveness and efficiency of search methods in systematic reviews of complex evidence: audit of primary sources</article-title>
          <source>BMJ</source>
          <year>2005</year>
          <volume>331</volume>
          <issue>7524</issue>
          <fpage>1064</fpage>
          <lpage>1065</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/16230312"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/bmj.38636.593461.68</pub-id>
          <pub-id pub-id-type="medline">16230312</pub-id>
          <pub-id pub-id-type="pii">bmj.38636.593461.68</pub-id>
          <pub-id pub-id-type="pmcid">PMC1283190</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Aria</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Cuccurullo</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>bibliometrix: an R-tool for comprehensive science mapping analysis</article-title>
          <source>J Informetr</source>
          <year>2017</year>
          <volume>11</volume>
          <issue>4</issue>
          <fpage>959</fpage>
          <lpage>975</lpage>
          <pub-id pub-id-type="doi">10.1016/j.joi.2017.08.007</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Munn</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Peters</surname>
              <given-names>MDJ</given-names>
            </name>
            <name name-style="western">
              <surname>Stern</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Tufanaru</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>McArthur</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Aromataris</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Systematic review or scoping review? Guidance for authors when choosing between a systematic or scoping review approach</article-title>
          <source>BMC Med Res Methodol</source>
          <year>2018</year>
          <volume>18</volume>
          <issue>1</issue>
          <fpage>143</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcmedresmethodol.biomedcentral.com/articles/10.1186/s12874-018-0611-x"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12874-018-0611-x</pub-id>
          <pub-id pub-id-type="medline">30453902</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12874-018-0611-x</pub-id>
          <pub-id pub-id-type="pmcid">PMC6245623</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Grad</surname>
              <given-names>YH</given-names>
            </name>
            <name name-style="western">
              <surname>Lipsitch</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Epidemiologic data and pathogen genome sequences: a powerful synergy for public health</article-title>
          <source>Genome Biol</source>
          <year>2014</year>
          <volume>15</volume>
          <issue>11</issue>
          <fpage>538</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://genomebiology.biomedcentral.com/articles/10.1186/s13059-014-0538-4"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s13059-014-0538-4</pub-id>
          <pub-id pub-id-type="medline">25418119</pub-id>
          <pub-id pub-id-type="pii">s13059-014-0538-4</pub-id>
          <pub-id pub-id-type="pmcid">PMC4282151</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Weissenbacher</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Sarker</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Tahsin</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Scotch</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Gonzalez</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Extracting geographic locations from the literature for virus phylogeography using supervised and distant supervision methods</article-title>
          <source>AMIA Jt Summits Transl Sci Proc</source>
          <year>2017</year>
          <volume>2017</volume>
          <fpage>114</fpage>
          <lpage>122</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/28815119"/>
          </comment>
          <pub-id pub-id-type="medline">28815119</pub-id>
          <pub-id pub-id-type="pmcid">PMC5543364</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="web">
          <source>Publishers make coronavirus (COVID-19) content freely available and reusable</source>
          <year>2020</year>
          <access-date>2025-02-06</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://wellcome.org/press-release/publishers-make-coronavirus-covid-19-content-freely-available-and-reusable">https://wellcome.org/press-release/publishers-make-coronavirus-covid-19-content-freely-available-and-reusable</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hernandez</surname>
              <given-names>MM</given-names>
            </name>
            <name name-style="western">
              <surname>Gonzalez-Reiche</surname>
              <given-names>AS</given-names>
            </name>
            <name name-style="western">
              <surname>Alshammary</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Fabre</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Khan</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>van De Guchte</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Obla</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Ellis</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Sullivan</surname>
              <given-names>MJ</given-names>
            </name>
            <name name-style="western">
              <surname>Tan</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Alburquerque</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Soto</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Sridhar</surname>
              <given-names>SH</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Smith</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Sebra</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Paniz-Mondolfi</surname>
              <given-names>AE</given-names>
            </name>
            <name name-style="western">
              <surname>Gitman</surname>
              <given-names>MR</given-names>
            </name>
            <name name-style="western">
              <surname>Nowak</surname>
              <given-names>MD</given-names>
            </name>
            <name name-style="western">
              <surname>Cordon-Cardo</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Luksza</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Krammer</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>van Bakel</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Simon</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Sordillo</surname>
              <given-names>EM</given-names>
            </name>
          </person-group>
          <article-title>Molecular evidence of SARS-CoV-2 in New York before the first pandemic wave</article-title>
          <source>Nat Commun</source>
          <year>2021</year>
          <volume>12</volume>
          <issue>1</issue>
          <fpage>3463</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/s41467-021-23688-7"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41467-021-23688-7</pub-id>
          <pub-id pub-id-type="medline">34103497</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41467-021-23688-7</pub-id>
          <pub-id pub-id-type="pmcid">PMC8187428</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref38">
        <label>38</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Page</surname>
              <given-names>AJ</given-names>
            </name>
            <name name-style="western">
              <surname>Mather</surname>
              <given-names>AE</given-names>
            </name>
            <name name-style="western">
              <surname>Le-Viet</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Meader</surname>
              <given-names>EJ</given-names>
            </name>
            <name name-style="western">
              <surname>Alikhan</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Kay</surname>
              <given-names>GL</given-names>
            </name>
            <name name-style="western">
              <surname>de Oliveira Martins</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Aydin</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Baker</surname>
              <given-names>DJ</given-names>
            </name>
            <name name-style="western">
              <surname>Trotter</surname>
              <given-names>AJ</given-names>
            </name>
            <name name-style="western">
              <surname>Rudder</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Tedim</surname>
              <given-names>AP</given-names>
            </name>
            <name name-style="western">
              <surname>Kolyva</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Stanley</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Yasir</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Diaz</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Potter</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Stuart</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Meadows</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Bell</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Gutierrez</surname>
              <given-names>AV</given-names>
            </name>
            <name name-style="western">
              <surname>Thomson</surname>
              <given-names>NM</given-names>
            </name>
            <name name-style="western">
              <surname>Adriaenssens</surname>
              <given-names>EM</given-names>
            </name>
            <name name-style="western">
              <surname>Swingler</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Gilroy</surname>
              <given-names>RAJ</given-names>
            </name>
            <name name-style="western">
              <surname>Griffith</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Sethi</surname>
              <given-names>DK</given-names>
            </name>
            <name name-style="western">
              <surname>Aggarwal</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Brown</surname>
              <given-names>CS</given-names>
            </name>
            <name name-style="western">
              <surname>Davidson</surname>
              <given-names>RK</given-names>
            </name>
            <name name-style="western">
              <surname>Kingsley</surname>
              <given-names>RA</given-names>
            </name>
            <name name-style="western">
              <surname>Bedford</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Coupland</surname>
              <given-names>LJ</given-names>
            </name>
            <name name-style="western">
              <surname>Charles</surname>
              <given-names>IG</given-names>
            </name>
            <name name-style="western">
              <surname>Elumogo</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Wain</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Prakash</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Webber</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Smith</surname>
              <given-names>SJL</given-names>
            </name>
            <name name-style="western">
              <surname>Chand</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Dervisevic</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>O'Grady</surname>
              <given-names>J</given-names>
            </name>
            <collab>The COVID-19 Genomics UK (COG-UK) Consortium</collab>
          </person-group>
          <article-title>Large-scale sequencing of SARS-CoV-2 genomes from one region allows detailed epidemiology and enables local outbreak management</article-title>
          <source>Microb Genom</source>
          <year>2021</year>
          <volume>7</volume>
          <issue>6</issue>
          <fpage>000589</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://mgen.microbiologyresearch.org/pubmed/content/journal/mgen/10.1099/mgen.0.000589"/>
          </comment>
          <pub-id pub-id-type="doi">10.1099/mgen.0.000589</pub-id>
          <pub-id pub-id-type="medline">34184982</pub-id>
          <pub-id pub-id-type="pmcid">PMC8461472</pub-id>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
