<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">ResProt</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR Res Protoc</journal-id>
      <journal-title>JMIR Research Protocols</journal-title>
      <issn pub-type="epub">1929-0748</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v14i1e69431</article-id>
      <article-id pub-id-type="pmid">40694835</article-id>
      <article-id pub-id-type="doi">10.2196/69431</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Protocol</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Protocol</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Collection and Analysis of Repeated Speech Samples: Methodological Framework and Example Protocol</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Sarvestan</surname>
            <given-names> Javad</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Johnson</surname>
            <given-names>Brian</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Lowie</surname>
            <given-names>Wander</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author">
          <name name-style="western">
            <surname>Cummins</surname>
            <given-names>Nicholas</given-names>
          </name>
          <degrees>BEng, PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-1178-917X</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author">
          <name name-style="western">
            <surname>White</surname>
            <given-names>Lauren Louise</given-names>
          </name>
          <degrees>BSc, MSc</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-0872-4241</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Rahman</surname>
            <given-names>Zahia</given-names>
          </name>
          <degrees>BSc</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0004-6327-3476</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Lucas</surname>
            <given-names>Catriona</given-names>
          </name>
          <degrees>BSc</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0000-6313-2789</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author">
          <name name-style="western">
            <surname>Pan</surname>
            <given-names>Tian</given-names>
          </name>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0006-1064-5672</ext-link>
        </contrib>
        <contrib id="contrib6" contrib-type="author">
          <name name-style="western">
            <surname>Carr</surname>
            <given-names>Ewan</given-names>
          </name>
          <degrees>MSc, PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-1146-4922</ext-link>
        </contrib>
        <contrib id="contrib7" contrib-type="author">
          <name name-style="western">
            <surname>Matcham</surname>
            <given-names>Faith</given-names>
          </name>
          <degrees>MSc, PhD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-4055-904X</ext-link>
        </contrib>
        <contrib id="contrib8" contrib-type="author">
          <name name-style="western">
            <surname>Downs</surname>
            <given-names>Johnny</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-8061-295X</ext-link>
        </contrib>
        <contrib id="contrib9" contrib-type="author">
          <name name-style="western">
            <surname>Dobson</surname>
            <given-names>Richard</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-4224-9245</ext-link>
        </contrib>
        <contrib id="contrib10" contrib-type="author">
          <name name-style="western">
            <surname>Quatieri</surname>
            <given-names>Thomas F</given-names>
          </name>
          <degrees>BS, SM, EE, SCD</degrees>
          <xref rid="aff4" ref-type="aff">4</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-1925-6340</ext-link>
        </contrib>
        <contrib id="contrib11" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Dineley</surname>
            <given-names>Judith</given-names>
          </name>
          <degrees>BSc, MSc, PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>Department of Biostatistics &#38; Health Informatics</institution>
            <institution>Institute of Psychiatry, Psychology and Neuroscience</institution>
            <institution>King's College London</institution>
            <addr-line>16 De Crespigny Park</addr-line>
            <addr-line>London, SE5 8AF</addr-line>
            <country>United Kingdom</country>
            <phone>44 20 7848 0002</phone>
            <email>judith.dineley@kcl.ac.uk</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-5541-6853</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Department of Biostatistics &#38; Health Informatics</institution>
        <institution>Institute of Psychiatry, Psychology and Neuroscience</institution>
        <institution>King's College London</institution>
        <addr-line>London</addr-line>
        <country>United Kingdom</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>School of Psychology</institution>
        <institution>University of Sussex</institution>
        <addr-line>Brighton</addr-line>
        <country>United Kingdom</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>CAMHS Digital Lab, King’s Maudsley Partnership, Department of Child &#38; Adolescent Psychiatry</institution>
        <institution>Institute of Psychiatry, Psychology and Neuroscience</institution>
        <institution>King's College London</institution>
        <addr-line>London</addr-line>
        <country>United Kingdom</country>
      </aff>
      <aff id="aff4">
        <label>4</label>
        <institution>Human Health and Performance Systems</institution>
        <institution>MIT Lincoln Laboratory</institution>
        <addr-line>Lexington</addr-line>
        <country>United States</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Judith Dineley <email>judith.dineley@kcl.ac.uk</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <year>2025</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>22</day>
        <month>7</month>
        <year>2025</year>
      </pub-date>
      <volume>14</volume>
      <elocation-id>e69431</elocation-id>
      <history>
        <date date-type="received">
          <day>2</day>
          <month>12</month>
          <year>2024</year>
        </date>
        <date date-type="rev-request">
          <day>31</day>
          <month>1</month>
          <year>2025</year>
        </date>
        <date date-type="rev-recd">
          <day>10</day>
          <month>4</month>
          <year>2025</year>
        </date>
        <date date-type="accepted">
          <day>5</day>
          <month>5</month>
          <year>2025</year>
        </date>
      </history>
      <copyright-statement>©Nicholas Cummins, Lauren Louise White, Zahia Rahman, Catriona Lucas, Tian Pan, Ewan Carr, Faith Matcham, Johnny Downs, Richard Dobson, Thomas F Quatieri, Judith Dineley. Originally published in JMIR Research Protocols (https://www.researchprotocols.org), 22.07.2025.</copyright-statement>
      <copyright-year>2025</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Research Protocols, is properly cited. The complete bibliographic information, a link to the original publication on https://www.researchprotocols.org, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://www.researchprotocols.org/2025/1/e69431" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Speech and language biomarkers have the potential to provide regular, objective assessments of symptom severity in several neurological and mental health conditions, both in the clinic and remotely. However, speech and language characteristics within an individual are influenced by multiple variables that can make findings highly dependent on the chosen methodology and study cohort. These characteristics are often not reported adequately in studies investigating speech-based health assessment, which (1) hinders the progress of methodological speech research, (2) prevents replication, and (3) makes the definitive identification of robust biomarkers problematic.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>This study aims (1) to facilitate replicable speech research by presenting a transparent speech collection and feature extraction protocol and design checklist for other researchers to adapt and design for their own experiments and (2) to demonstrate in a pilot study the feasibility of implementing our example in-laboratory protocol that reduces multiple potential confounding factors in repeated recordings of healthy speech.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>We developed a collection and feature extraction protocol based on a thematic literature review to enable a controlled investigation of within-individual speech variability in healthy individuals. Our protocol comprises the elicitation of read speech, held vowels, and a picture description and extraction of 14 example features relevant to health. We collected speech using a freestanding condenser microphone, 3 smartphones, and a headset to enable a sensitivity analysis across different recording devices.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>We collected healthy speech data from 28 individuals 3 times in 1 day (the “day” cohort), with the same schedule repeated 8 to 11 weeks later, and from 25 individuals on 3 days within 1 week at fixed times (the “week” cohort). Participant characteristics collected included sex, age, native language, and voice use habits. Before each recording, we collected information on recent voice use, food and drink intake, and emotional state. Recording times were also documented. Analysis relating to exploring within-individual variability within the day and week cohorts, as well as the device-type sensitivity analysis, is ongoing, with findings expected later in 2025.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>The wide variability in speech data collection, processing, analysis, and reporting in research on speech’s use in clinical trials and practice is the motivation for this paper and the development of the speech curation protocol design checklist. Increased, more consistent reporting and justification of study protocols is urgently required to facilitate speech research replication and translation into clinical practice.</p>
        </sec>
        <sec sec-type="registered-report">
          <title>International Registered Report Identifier (IRRID)</title>
          <p>DERR1-10.2196/69431</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>speech</kwd>
        <kwd>voice</kwd>
        <kwd>replication</kwd>
        <kwd>longitudinal</kwd>
        <kwd>repeat recordings</kwd>
        <kwd>within-speaker variability</kwd>
        <kwd>health assessment</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <sec>
        <title>Speech as a Digital Marker of Health</title>
        <p>The linguistic and paralinguistic content of our speech contains rich information about our cognitive, neuromuscular, and respiratory functioning. There is a growing body of literature highlighting the potential of speech as an objective marker for disease diagnosis, monitoring, and prediction in a variety of clinical cohorts, including amyotrophic lateral sclerosis (ALS) [<xref ref-type="bibr" rid="ref1">1</xref>], Parkinson disease [<xref ref-type="bibr" rid="ref2">2</xref>-<xref ref-type="bibr" rid="ref4">4</xref>], psychosis [<xref ref-type="bibr" rid="ref5">5</xref>,<xref ref-type="bibr" rid="ref6">6</xref>], and major depressive disorder [<xref ref-type="bibr" rid="ref7">7</xref>,<xref ref-type="bibr" rid="ref8">8</xref>] as has been summarized in several reviews [<xref ref-type="bibr" rid="ref9">9</xref>-<xref ref-type="bibr" rid="ref11">11</xref>]. Key advantages of recording speech for clinical applications include its noninvasive nature and the ability to conduct recordings both in the clinic and remotely using off-the-shelf consumer-grade audio equipment.</p>
      </sec>
      <sec>
        <title>Challenges and Confounders in Speech Biomarker Research</title>
        <p>Though speech has great potential as a signal, accurately detecting changes in speech driven by changes in health is challenging, and speech markers are yet to be used as an outcome measure in clinical trials or translated for clinical use. This is partly because speech is a multifaceted, complex, dynamic signal. Many speech changes associated with different health states can be subtle, forming one part of a measured signal that is also dictated by other speaker-specific factors and recording and analysis choices. There is a pressing need to understand, quantify, and adjust for the effect of such variables, as they can mask or even mimic the effect of health changes.</p>
        <p>Potential confounding factors related to the speaker include hormonal variations within the menstrual cycle [<xref ref-type="bibr" rid="ref12">12</xref>,<xref ref-type="bibr" rid="ref13">13</xref>], fatigue [<xref ref-type="bibr" rid="ref14">14</xref>], voice use habits [<xref ref-type="bibr" rid="ref15">15</xref>-<xref ref-type="bibr" rid="ref17">17</xref>], emotion [<xref ref-type="bibr" rid="ref18">18</xref>], and hydration [<xref ref-type="bibr" rid="ref19">19</xref>]. Systematic changes with age, menopause, and medication use have also been reported [<xref ref-type="bibr" rid="ref20">20</xref>-<xref ref-type="bibr" rid="ref23">23</xref>]. A growing body of literature highlights the impact of methodological choices, including recording environment, hardware choices, digitization formats, and choice of extraction tools, on speech characteristics and subsequent health state analysis [<xref ref-type="bibr" rid="ref24">24</xref>-<xref ref-type="bibr" rid="ref29">29</xref>].</p>
        <p>Speech elicitation strategies are another important factor in speech-based health assessment. Common strategies include (1) structured tasks, such as reading passages and vocal function exercises such as sustained phonation [<xref ref-type="bibr" rid="ref30">30</xref>,<xref ref-type="bibr" rid="ref31">31</xref>]; (2) semistructured tasks, such as image description exercises [<xref ref-type="bibr" rid="ref32">32</xref>,<xref ref-type="bibr" rid="ref33">33</xref>]; and (3) minimally structured tasks and conversational speech that, for clinical cohorts, include clinical interviews [<xref ref-type="bibr" rid="ref10">10</xref>,<xref ref-type="bibr" rid="ref11">11</xref>]. Choosing the right tasks is vital for ensuring the clinical validity and sensitivity of the extracted speech measures [<xref ref-type="bibr" rid="ref34">34</xref>]; each task can produce distinct acoustic, linguistic, and emotional content and be used for the targeted capture of different aspects of speech production [<xref ref-type="bibr" rid="ref34">34</xref>,<xref ref-type="bibr" rid="ref35">35</xref>].</p>
        <p>Less-structured tasks pose technical challenges, such as the need for accurate speech-to-text conversion and diarization to determine who was speaking what and when [<xref ref-type="bibr" rid="ref30">30</xref>]. Practice effects represent another potential confounder, where recorded speech changes due to repeated exposure to a task or activities [<xref ref-type="bibr" rid="ref36">36</xref>,<xref ref-type="bibr" rid="ref37">37</xref>]. Although expected in speech research, practice effects are rarely documented [<xref ref-type="bibr" rid="ref9">9</xref>,<xref ref-type="bibr" rid="ref38">38</xref>].</p>
        <p>Despite an awareness of such effects, methodological details and important speaker characteristics in speech-based health assessment research are underreported in the literature. These factors can be unaccounted sources of variation that become particularly pertinent when effect sizes are small or context dependent [<xref ref-type="bibr" rid="ref39">39</xref>], which is often the case in health analyses of speech. This is of particular concern for remote data collection outside of laboratory settings where there are more degrees of freedom, for example, recording devices and geometry and the acoustic environment. This weakens replicability and hinders the development of robust methodology and tools; the discovery and verification of biomarkers; and, ultimately, clinical translation.</p>
      </sec>
      <sec>
        <title>Enhancing Speech Biomarker Research Through Methodology and Reporting</title>
        <p>The lack of established methods for data collection and reporting exacerbates these issues highlighted above [<xref ref-type="bibr" rid="ref40">40</xref>]. The Consensus Auditory-Perceptual Evaluation of Voice (Cape-V) protocol [<xref ref-type="bibr" rid="ref41">41</xref>] and recommendations made by the American Speech-Language-Hearing Association Panel [<xref ref-type="bibr" rid="ref42">42</xref>] are helpful starting points. However, they have limited applicability in detecting subtle changes and the broader range of speech characteristics associated with, for example, mental health and neurological disorders recorded remotely and longitudinally. These recommendations were also developed specifically for in-laboratory speech pathology assessments.</p>
        <p>The Voiceome Study represents an attempt at standardization of longitudinal data collection for speech and language biomarker research [<xref ref-type="bibr" rid="ref43">43</xref>]. A key feature is its recommendation of 12 speech elicitation tasks, and the study highlights that these tasks produce distinct feature clusters. However, the authors do not describe the clinical relevance of these prompts or provide any evidence base justifying their inclusion. The implications for participant burden and associated protocol acceptance and adherence by participants are also not discussed, which is an important issue in data collection [<xref ref-type="bibr" rid="ref44">44</xref>]. The effects of the recording environment, recording time, hardware choices, and speech processing methods on the quality of extracted data are also not considered.</p>
        <p>In conclusion, the effect of speaker-related factors and methodological choices necessitates increased reporting and justification of methods used in speech and language biomarker research, including more well-considered protocol design. To begin addressing this, we report on our detailed study protocol for collecting in-laboratory repeated speech samples from healthy individuals. Our aim in publishing this protocol is to promote transparency and reproducibility as a key step toward increased research replication and more reliable identification and validation of speech biomarkers in this domain [<xref ref-type="bibr" rid="ref40">40</xref>,<xref ref-type="bibr" rid="ref45">45</xref>,<xref ref-type="bibr" rid="ref46">46</xref>]. This protocol is also a resource to replicate and adapt, including by researchers with less experience and specialist knowledge of speech processing.</p>
      </sec>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Overview</title>
        <p>Our pilot study builds on the literature to present an example protocol for speech corpora curation that reports and justifies methodological aspects for adaptation for other studies. We conducted this study with a specific research focus of assessing natural variability in individual voices and understanding the sensitivity of different recording devices to these variations. We collected and analyzed speech samples from healthy participants to avoid variability driven by pathology. Datasets of healthy individuals are also beneficial as baselines for comparison with clinical populations [<xref ref-type="bibr" rid="ref34">34</xref>,<xref ref-type="bibr" rid="ref47">47</xref>].</p>
        <p>The consideration of participant burden and the acceptability of the protocol to participants was a part of the research process, as these factors have important implications for recruitment and protocol adherence and therefore data quality and completeness. While our protocol was designed with a specific scientific goal in mind, the core methodological aspects are relevant for researchers collecting longitudinal data to investigate other questions in speech and language biomarker research. Such research will benefit from the minimization of variations in speech between recordings due to methodological factors and clear reporting of methodology. By presenting our methodological choices in this study, we would like to enable other researchers to adapt our protocol in their own work in healthy and clinical cohorts, thereby facilitating replicable speech research.</p>
      </sec>
      <sec>
        <title>Protocols for Investigating Within-Individual Speech Variability</title>
        <p>Most protocols in the literature have been a part of studies assessing localized vocal tract pathology and dysphonia, analyzing a small number of speech characteristics relevant to localized speech pathology, typically with modest-sized cohorts. Many of these studies were also conducted before remote recording and mobile devices were a consideration [<xref ref-type="bibr" rid="ref48">48</xref>,<xref ref-type="bibr" rid="ref49">49</xref>].</p>
        <p>Most recently, motivated toward speech pathology vocal tract assessment, Pierce et al [<xref ref-type="bibr" rid="ref50">50</xref>] assessed variability in repeated speech samples from healthy female participants in a remote recording study. Participants completed 1 supervised baseline recording and then recorded themselves 3 times each day for a week within prescribed intervals in a well-described protocol. The 45 participants read aloud 2 passages of text and produced sustained vowel phonations each time using a cardioid head-mounted microphone. Participants were advised to record in a quiet room with no tiling; however, adherence to this was not reported. The authors [<xref ref-type="bibr" rid="ref50">50</xref>] analyzed 32 speech features, observing significant voice production changes over a day but no significant changes across the week. They speculate that the “worse” voice they observed in the morning could be due to (1) voice production systems affected by physiological changes due to prolonged reclining while asleep and (2) low voice use before the participants completed their first recording. Other studies have demonstrated variations in voice based on the level of hydration in participants [<xref ref-type="bibr" rid="ref19">19</xref>,<xref ref-type="bibr" rid="ref50">50</xref>].</p>
        <p>Several studies motivated by mental and neurological disorder assessment have quantified within-speaker change, framed as test-retest reliability assessment. Feng et al [<xref ref-type="bibr" rid="ref51">51</xref>] recorded 40 healthy young adults twice, 2 to 3 days apart, in the same test room, completing 7 elicitation tasks in Mandarin. They observed that only half of the 56 speech features tested had moderate test-retest reliability, as estimated using intraclass correlation (ICC). Barnett et al [<xref ref-type="bibr" rid="ref52">52</xref>] retrospectively analyzed speech features of 46 healthy individuals recorded twice, months apart, reading aloud a “Bamboo” passage. They also observed only moderate test-retest reliability in half of the analyzed features. Stegmann et al [<xref ref-type="bibr" rid="ref53">53</xref>] reported an analysis of 22 healthy individuals recorded daily for 7 days and clinical cohorts with ALS (72 participants) and frontotemporal dementia (24 participants) recorded approximately a week apart. They reported that the test-retest reliability, also estimated using an ICC of commonly used speech features, was well below acceptable limits for clinical use.</p>
        <p>Each of these analyses highlights that we should expect some degree of variation in voice between repeated recordings of an individual. However, in each of these analyses, various potentially confounding methodological details such as consistency in recording time and acoustic conditions—and adherence to instructions in the unsupervised (“in-the-wild”) recordings—are not reported [<xref ref-type="bibr" rid="ref12">12</xref>-<xref ref-type="bibr" rid="ref29">29</xref>]. Therefore, at least in principle, measurement factors may be responsible for a proportion of the observed differences between repeated recordings of a given participant. An additional potential limitation of these works is the use of the same elicitation scripts in each recording. Increased speaker familiarity with the readings can result in practice effects [<xref ref-type="bibr" rid="ref36">36</xref>,<xref ref-type="bibr" rid="ref37">37</xref>], which could confound the assessment of within-individual speech variability [<xref ref-type="bibr" rid="ref9">9</xref>,<xref ref-type="bibr" rid="ref38">38</xref>]. Finally, to the best of the authors’ knowledge, none of the aforementioned studies have provided data (either raw audio or extracted features).</p>
        <p>To address our chosen research question of within-individual speech variability, our protocol improves on these previous works in that we collected data at set documented times in a controlled, supervised environment and used multiple microphone types (<xref ref-type="table" rid="table1">Table 1</xref>). We also combined several structured and semistructured tasks to elicit both naive and practiced speech, with both spontaneous speech and scripted, fixed content to control for various factors. As a step toward our methodological goal of improving the reporting of methods in speech corpora curation, we present our protocol in detail in the following section.</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Comparison of key methodological choices in protocols of studies observing within- and between-speaker variability.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="200"/>
            <col width="90"/>
            <col width="100"/>
            <col width="180"/>
            <col width="130"/>
            <col width="170"/>
            <col width="130"/>
            <thead>
              <tr valign="top">
                <td>Study</td>
                <td>Sample, n</td>
                <td>Cohort type</td>
                <td>Schedule</td>
                <td>Laboratory versus remote</td>
                <td>Microphone</td>
                <td>Speech type</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>This study</td>
                <td>28</td>
                <td>Healthy</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>3/d, twice in 8-11 wk</p>
                    </list-item>
                    <list-item>
                      <p>Fixed times</p>
                    </list-item>
                  </list>
                </td>
                <td>Laboratory</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Condenser</p>
                    </list-item>
                    <list-item>
                      <p>3 phones</p>
                    </list-item>
                    <list-item>
                      <p>1 headset</p>
                    </list-item>
                  </list>
                </td>
                <td>R<sup>a</sup>, SV<sup>b</sup>, and PD<sup>c</sup></td>
              </tr>
              <tr valign="top">
                <td>This study</td>
                <td>26</td>
                <td>Healthy</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>3 in 1 wk</p>
                    </list-item>
                    <list-item>
                      <p>Fixed days fixed time</p>
                    </list-item>
                  </list>
                </td>
                <td>Laboratory</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Condenser</p>
                    </list-item>
                    <list-item>
                      <p>3 phones</p>
                    </list-item>
                    <list-item>
                      <p>1 headset</p>
                    </list-item>
                  </list>
                </td>
                <td>R, SV, and PD</td>
              </tr>
              <tr valign="top">
                <td>Garrett and Healey [<xref ref-type="bibr" rid="ref48">48</xref>], 1987</td>
                <td>20</td>
                <td>Healthy</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>3 in 1 d</p>
                    </list-item>
                  </list>
                </td>
                <td>Laboratory</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Miniature condenser</p>
                    </list-item>
                  </list>
                </td>
                <td>R</td>
              </tr>
              <tr valign="top">
                <td>Leong et al [<xref ref-type="bibr" rid="ref49">49</xref>], 2013</td>
                <td>18</td>
                <td>Healthy</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>10 in 30 d, fixed time interval</p>
                    </list-item>
                  </list>
                </td>
                <td>Laboratory</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Moving coil</p>
                    </list-item>
                  </list>
                </td>
                <td>R and SV</td>
              </tr>
              <tr valign="top">
                <td>Pierce et al [<xref ref-type="bibr" rid="ref50">50</xref>], 2021</td>
                <td>45</td>
                <td>Healthy</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>3/d in 1 wk</p>
                    </list-item>
                  </list>
                </td>
                <td>Remote</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Headset condenser</p>
                    </list-item>
                  </list>
                </td>
                <td>R and SV</td>
              </tr>
              <tr valign="top">
                <td>Barnett et al [<xref ref-type="bibr" rid="ref52">52</xref>], 2020</td>
                <td>46</td>
                <td>Healthy</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>2 in 3-6 mo</p>
                    </list-item>
                  </list>
                </td>
                <td>NES<sup>d</sup></td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>NES</p>
                    </list-item>
                  </list>
                </td>
                <td>R</td>
              </tr>
              <tr valign="top">
                <td>Stegmann et al [<xref ref-type="bibr" rid="ref53">53</xref>], 2020</td>
                <td>72</td>
                <td>ALS<sup>e</sup></td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Daily</p>
                    </list-item>
                    <list-item>
                      <p>1 wk</p>
                    </list-item>
                  </list>
                </td>
                <td>Remote</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>NES</p>
                    </list-item>
                  </list>
                </td>
                <td>R and SV</td>
              </tr>
              <tr valign="top">
                <td>Stegmann et al [<xref ref-type="bibr" rid="ref53">53</xref>], 2020</td>
                <td>22</td>
                <td>Healthy</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Daily</p>
                    </list-item>
                    <list-item>
                      <p>1 wk</p>
                    </list-item>
                  </list>
                </td>
                <td>Remote</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>NES</p>
                    </list-item>
                  </list>
                </td>
                <td>R and SV</td>
              </tr>
              <tr valign="top">
                <td>Stegmann et al [<xref ref-type="bibr" rid="ref53">53</xref>], 2020</td>
                <td>24</td>
                <td>ALS and dementia</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>2 in approximately 1 wk</p>
                    </list-item>
                  </list>
                </td>
                <td>NES</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>NES</p>
                    </list-item>
                  </list>
                </td>
                <td>R, SV, and PD</td>
              </tr>
              <tr valign="top">
                <td>Feng et al [<xref ref-type="bibr" rid="ref51">51</xref>], 2024</td>
                <td>40</td>
                <td>Healthy</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>2 in 2-3 d</p>
                    </list-item>
                  </list>
                </td>
                <td>Laboratory</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Condenser</p>
                    </list-item>
                  </list>
                </td>
                <td>R, SV, CS<sup>f</sup>, RS<sup>g</sup>, and DDK<sup>h</sup></td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table1fn1">
              <p><sup>a</sup>R: read, scripted speech.</p>
            </fn>
            <fn id="table1fn2">
              <p><sup>b</sup>SV: sustained vowels.</p>
            </fn>
            <fn id="table1fn3">
              <p><sup>c</sup>PD: picture description.</p>
            </fn>
            <fn id="table1fn4">
              <p><sup>d</sup>NES: not explicitly stated by authors.</p>
            </fn>
            <fn id="table1fn5">
              <p><sup>e</sup>ALS: amyotrophic lateral sclerosis.</p>
            </fn>
            <fn id="table1fn6">
              <p><sup>f</sup>CS: connected speech.</p>
            </fn>
            <fn id="table1fn7">
              <p><sup>g</sup>RS: repetition of heard speech.</p>
            </fn>
            <fn id="table1fn8">
              <p><sup>h</sup>DDK: diadokinetic rate test.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Protocol</title>
        <sec>
          <title>Overview</title>
          <p>Herein, we describe our protocol, the methodological goal of which was to capture repeated speech samples with minimized measurement variability. We describe multiple methodological details relevant to wider speech and language biomarker research. To facilitate adaptation to new protocols addressing other research questions, we provide a checklist of key considerations (<xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>).</p>
          <p>This protocol’s primary scientific focus was to assess within-speaker nonpathological variation in speech over time. In the “day” cohort, we aimed to record healthy volunteers speaking (1) in the morning, afternoon, and early evening of a single day (day 1) and (2) repeatedly at the same times on a second day 8 to 11 weeks later (day 2). In the “week” cohort, our aim was to record healthy volunteers on 3 days in 1 week at the same time each day.</p>
        </sec>
        <sec>
          <title>Recruitment</title>
          <p>As a pilot study in which we sought to develop methodology, we chose to investigate variability in healthy individuals to avoid the additional variability introduced by pathology. Healthy cohorts are also valuable to establish baselines with which to compare pathological speech [<xref ref-type="bibr" rid="ref34">34</xref>,<xref ref-type="bibr" rid="ref47">47</xref>]. We recruited adult staff and students at the study institute, and local residents were recruited via advertisements in a research recruitment circular, institute email lists, social media, and physical flyers and posters. Potential participants were asked to read a web-based information sheet and complete a pre-enrollment screening form that repeated the eligibility criteria and collected contact details and sociodemographic data to facilitate the recruitment of a balanced cohort.</p>
          <p>We excluded individuals aged &#60;16 or &#62;65 years; those aged &#62;65 years were excluded to minimize speech effects associated with aging [<xref ref-type="bibr" rid="ref23">23</xref>]. We also excluded smokers; those with dyslexia; and individuals currently receiving treatment for any speech, auditory, mental, neurological, respiratory, or other health disorder that could impact their speech. In addition, we excluded nonnative English speakers unless they had a sufficient level of English proficiency to read an intermediate or advanced text aloud, selecting level B2 of the Common European Framework of Reference for Languages as a threshold [<xref ref-type="bibr" rid="ref54">54</xref>]. This was a compromise to ensure recruitment feasibility in a population with a considerable proportion of nonnative speakers in a strict timeline set by funder requirements, while minimizing confounders due to lack of reading and speaking proficiency for the specific speech elicitation tasks we were implementing.</p>
          <p>Inclusion and exclusion criteria were provided on the web to all individuals who considered participation.</p>
          <p>We regularly checked the cohort balance throughout recruitment to enable timely, targeted recruitment as needed. Sociodemographic groups that were underrepresented at pre-enrollment—male participants and participants aged &#62;30 years—were prioritized for follow-up and recruitment. After an initial round of advertising, in subsequent advertising, we advertised for male participants exclusively.</p>
          <p>Researchers emailed individuals to allocate them to 3 recording sessions in 1 day (day cohort) or week (week cohort) according to their availability and preference. Emails at each stage of participation used text templates individually adapted for more personable communication to encourage engagement. Each provisional participant’s recording sessions were scheduled, and they were emailed links to an electronic enrollment and consent form hosted on Qualtrics (Qualtrics International Inc) within 72 hours of the first session. This was to minimize the unnecessary collection of data from individuals who agreed to attend but subsequently decided not to participate.</p>
        </sec>
        <sec>
          <title>Data Collection Schedule</title>
          <p>Participants in the week cohort were scheduled for recording on a Monday, Wednesday, and Friday, fixed days that avoided the weekend to minimize confounders associated with different days of the week. Each participant in the week cohort was recorded at the same time on each day, to also minimize within-day variability between recordings [<xref ref-type="bibr" rid="ref50">50</xref>]. Participants in the week cohort were given the option to have their session start between 10 AM and 12 PM or 3 PM and 5 PM. Participants in the day cohort were scheduled for recording starting between 8 AM and 10 AM, 1 PM and 3 PM, and 5 PM and 7 PM). A minimum time between sessions of 3.5 hours was maintained to maximize the likelihood of measuring differences in speech with time of day. The same participants were scheduled to return for a second day of recording at least 8 weeks later. Day 2 of recording was scheduled for the same day of the week as day 1 and scheduled at the same times.</p>
        </sec>
        <sec>
          <title>Recording Session Procedure</title>
          <p>At each participant’s first session, researchers explained the recording procedure, and those who had not already done so before the session completed their enrollment and consent. The forms collected basic sociodemographic data, height (as a proxy of larynx length), information on the participants’ voice use habits in the previous 3 months, and their level of English, for nonnative speakers.</p>
          <p>Before beginning the study, the project team discussed the clearest and most consistent way to instruct participants. Our aim was to make participants feel as comfortable as possible and encourage natural speech and reproducible positioning during recording. The team had regular discussions as data collection progressed on any difficulties in this regard and ways to improve participant instruction.</p>
          <p>At the start of every recording session, participants were also asked to complete a prerecording questionnaire on Qualtrics that collected information on factors that might introduce between-recording variation in their speech. These included the times at which participants woke up and got out of bed, when they last ate and drank any liquid, the extent of their voice use that day before recording, how much sleep they had the previous night, and if they were experiencing any minor health issues that could affect their voice (<xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>). The prerecording questionnaire also included the Pick-A-Mood tool [<xref ref-type="bibr" rid="ref55">55</xref>]. Participants were also offered a drink of water at the start of each session; we recorded if they took this.</p>
          <p>Participants were seated as comfortably as possible on an office chair at a desk. Their speech was recorded with an Audio Technica 2020USB+ condenser microphone on a shock mount fitted to a Rylock foam pop filter on a tabletop stand (<xref rid="figure1" ref-type="fig">Figure 1</xref>). The microphone was operated using Audacity open-source software running on a Dell Latitude 7440 laptop (i5 core, 16 GB RAM) running Windows 11 (Microsoft Corporation). The microphone gain was set to a fixed value at the start of every session to maximize the signal-to-noise ratio while avoiding clipping. Participants were positioned 30 cm from the condenser microphone, the distance at which the device’s frequency response is specified. The chair’s height and left-right position were adjusted so the participant’s mouth was level with the pop filter and centered on the microphone. Participants were reminded not to move their chairs during the session. The participant and setup were surrounded by acoustic-absorbing foam and textiles.</p>
          <p>We positioned 3 smartphones (Apple iPhone 11 [released 2019], Samsung Galaxy S20 FE 5G [released 2020], and Motorola G5 [released 2017]) directly adjacent to and in the plane of the pop filter with their microphones positioned on the estimated vertical midline of the condenser microphone. These positions were fixed through all recordings and were comparable to if the participant held their phone in front of them as if in a video call [<xref ref-type="bibr" rid="ref7">7</xref>]. Smartphone positioning was checked before each session.</p>
          <p>Participants also wore a budget consumer office headset (Plantronics Blackwire 3220). The American Speech-Language-Hearing Association Panel recommends the use of headset microphones as the microphone-mouth distance can be fixed for the duration of a recording [<xref ref-type="bibr" rid="ref42">42</xref>]. Our headset was operated using Audacity run on a MacBook Air (Intel Core i5, 16-GB RAM; Apple, Inc), using a gain level fixed over all participants and sessions. Participants were instructed to position the headset microphone 2 finger widths from their cheek and to one side of their mouth, using a mirror as needed. The supervising researcher checked headset microphone positioning before recording.</p>
          <p>Before commencing the elicitation tasks, the participants were instructed to complete them at their own pace and to speak at a natural volume and pace. They were also instructed to switch their phones off or into flight mode or leave the phones outside the recording room to prevent interference with the recordings.</p>
          <p>At the beginning and end of each recording session, as well as between each exercise, the researcher running the session played an audio tone (an alarm tone on their mobile) to prompt the participant to proceed with the next speech task and to aid the manual separation of the tasks into individual audio files following the session.</p>
          <p>Following the completion of the speech tasks, the researcher assisted the participant in removing the headset and stopped each recording device. Participants were thanked for their time and reminded of their next recording session appointment, where applicable. At the end of each participant’s final recording session, researchers asked participants to consider completing a postparticipation questionnaire. Following their departure, the project team promptly emailed participants a link to the questionnaire and codes for shopping e-vouchers, compensating them for their time.</p>
          <fig id="figure1" position="float">
            <label>Figure 1</label>
            <caption>
              <p>Recording setup from the side (left) and the front (right).</p>
            </caption>
            <graphic xlink:href="resprot_v14i1e69431_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
        </sec>
        <sec>
          <title>Speech Elicitation Tasks</title>
          <p>Researchers provided participants with a varying combination of speech elicitation tasks in each session (<xref rid="figure2" ref-type="fig">Figure 2</xref>). Our choices balanced collecting several types of speech that, when combined, provide a variety of health-related indicators and sufficient amounts of each with participant burden and acceptance. A protocol with too many tasks, long recording sessions, or the elicitation of speech with personal content could deter potential participants and result in failures to complete all scheduled sessions.</p>
          <p>Session 1 began with a short, simple reading, the “North Wind and the Sun” [<xref ref-type="bibr" rid="ref56">56</xref>], as a form of warm-up exercise to help participants feel comfortable and settled before beginning the other tasks that would be the focus of our analyses. This was followed by a longer reading, “The Rainbow Passage” (long version) [<xref ref-type="bibr" rid="ref57">57</xref>]; a timed picture description (up to 2 minutes) and 3 repetitions each of 3 held vowels, /a/, /o/, and /i/. In sessions 2 and 3, participants completed the 2 readings from the first session and the held vowels and an additional long reading in each, one of “Your Rate of Oral Reading” [<xref ref-type="bibr" rid="ref58">58</xref>] and “Comma Gets a Cure” [<xref ref-type="bibr" rid="ref59">59</xref>]. They also completed a new picture description in each of sessions 2 and 3. The elicitation task order was varied between sessions of each participant and between participants to avoid introducing systematic biases with specific tasks.</p>
          <fig id="figure2" position="float">
            <label>Figure 2</label>
            <caption>
              <p>Speech elicitation overview. Our protocol elicited nonpracticed long-scripted speech in each session, plus practiced short and long readings, except in session 1. Participants described a different picture and produced held vowels in each session. The task order was varied between participants and between sessions.</p>
            </caption>
            <graphic xlink:href="resprot_v14i1e69431_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
          <p>The scripted tasks provided standardized linguistic content. Repetitions of the “North Wind and the Sun” and “The Rainbow Passage” enable direct comparison of paralinguistic features for the same speech between sessions, although these repeated recordings will also be affected by practice effects. Recordings of “Your Rate of Oral Reading” and “Comma Gets a Cure” provided set linguistic content that was not subject to practice effects in the week study and in day 1 of the day study, as they were new to the participant.</p>
          <p>We selected “Your Rate of Oral Reading” and “Comma Gets a Cure” as, along with “The Rainbow Passage,” the 3 readings have a similar lexical and linguistic complexity and length, combined with a similar phonetic balance in the literature [<xref ref-type="bibr" rid="ref30">30</xref>,<xref ref-type="bibr" rid="ref31">31</xref>]. Therefore, we deemed them suitable for quantifying speech variability between sessions while avoiding practice effects.</p>
          <p>“The Rainbow Passage” and “Your Rate of Oral Reading” were selected as factual texts rather than stories to minimize the likelihood of participants using a “storytelling” voice and therefore maximize the likelihood of them speaking in their natural voices. This choice was informed by our observations in the mobile health study, Remote Assessment of Disease and Relapse—Major Depressive Disorder (RADAR-MDD) [<xref ref-type="bibr" rid="ref60">60</xref>], where participants tended to use emphasis and be expressive in reading a story. Our choice of “Comma Gets a Cure” was a compromise; it is a story but has desirable lexical and phonetic characteristics that have been well documented in the literature [<xref ref-type="bibr" rid="ref30">30</xref>,<xref ref-type="bibr" rid="ref31">31</xref>].</p>
          <p>Picture description tasks provided spontaneous speech. We used 3 images: the Cookie Theft (original version), the Cat in the Tree, and the Birthday Cake [<xref ref-type="bibr" rid="ref33">33</xref>]. All pictures are black and white designs, depicting a simple story situation with a central focus and interacting elements. Typically used in speech assessment in neurodegenerative disorders, for example, Alzheimer disease, to investigate cognitive characteristics via the linguistic content of an individual’s speech [<xref ref-type="bibr" rid="ref32">32</xref>], picture descriptions also have value in paralinguistic analysis [<xref ref-type="bibr" rid="ref61">61</xref>].</p>
          <p>Held vowel sounds provided standardized acoustic signals without any lexical, structural, or linguistic effects to account for, suitable for measurement of perturbation and quality measures [<xref ref-type="bibr" rid="ref62">62</xref>,<xref ref-type="bibr" rid="ref63">63</xref>]. The choice of elicitation tasks was advantageous from a data privacy perspective, as they did not elicit the disclosure of personal information.</p>
        </sec>
        <sec>
          <title>Data Quality Control Checks, Storage, and Preparation</title>
          <p>After each recording session, all audio files were named in the following format: ParticipantID_Device_Day_Session. They were then uploaded to a secure Microsoft SharePoint site maintained by King’s College London, accessible only by project staff.</p>
          <p>The researcher running the session also completed a data quality control log that detailed (1) the start time of each session, using the timestamp on the audio files; (2) if the participant drank any water during the session; (3) any interruptions or participant behavior that could affect the recording content or quality, for example, the participant moving their chair and subsequent chair repositioning; (4) any extraneous noise during the session; (5) any issues completing the vowel task; (6) any participant difficulties completing the tasks; and (7) any other event or observation not covered by the other fields that could affect the recording.</p>
          <p>The researcher then checked that (1) all audio files were uploaded into the correct participant and session folders, (2) each file contained recordings of the correct speaker, and (3) all tasks were completed in the stated order. The researcher also noted any additional audible issues in the data not previously captured in the quality control log.</p>
          <p>Recordings of individual elicitation tasks were then separated into individual files using Audacity. File names were appended to include which task they contained with the following naming convention: ParticipantID_Device_Day_Session_Task.</p>
        </sec>
        <sec>
          <title>Preliminary Feature Extraction</title>
          <p>We extracted 14 example features from condenser microphone recordings of “The Rainbow Passage.” The purpose of extracting these features as part of our protocol development is to demonstrate the feasibility of our methodology pipeline. We present features captured with the condenser microphone only as our benchmark device, as example values that are not subject to any preprocessing that could erroneously affect the values extracted. These features were chosen as they are commonly used in speech-health research, representing timing and fluency characteristics and the speech production subsystems of respiration, phonation, and articulation (<xref ref-type="table" rid="table2">Table 2</xref>).</p>
          <table-wrap position="float" id="table2">
            <label>Table 2</label>
            <caption>
              <p>Speech features extracted from the recordings to generate normative.</p>
            </caption>
            <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
              <col width="30"/>
              <col width="350"/>
              <col width="0"/>
              <col width="620"/>
              <thead>
                <tr valign="top">
                  <td colspan="3">Features</td>
                  <td>Description</td>
                </tr>
              </thead>
              <tbody>
                <tr valign="top">
                  <td colspan="4">
                    <bold>Timing and fluency</bold>
                  </td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Duration, second</td>
                  <td colspan="2">Length of recording</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Speaking rate, syllables second<sup>–1</sup></td>
                  <td colspan="2">Total syllables divided by duration</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Articulation rate, syllables second<sup>–1</sup></td>
                  <td colspan="2">Total syllables divided by total speaking time</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Pause rate, second<sup>–1</sup></td>
                  <td colspan="2">Total pauses divided by duration</td>
                </tr>
                <tr valign="top">
                  <td colspan="4">
                    <bold>Respiration</bold>
                  </td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Intensity (mean), dB</td>
                  <td colspan="2">Loudness of speech signal</td>
                </tr>
                <tr valign="top">
                  <td colspan="4">
                    <bold>Phonation</bold>
                  </td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Pitch (mean), Hz</td>
                  <td colspan="2">Auditory perceived tone</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Pitch (SD), semitones</td>
                  <td colspan="2">SD of pitch</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Harmonic:noise ratio (mean), dB</td>
                  <td colspan="2">Extent to which harmonic structures are affected by noise</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Spectral slope (mean)</td>
                  <td colspan="2">Gradient of the voiced spectrum</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Cepstral peak prominence (mean), dB</td>
                  <td colspan="2">Amplitude of cepstral peak, relative to a regression line through the cepstrum</td>
                </tr>
                <tr valign="top">
                  <td colspan="4">
                    <bold>Articulatory</bold>
                  </td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>First formant frequency (mean), Hz</td>
                  <td colspan="2">First resonant frequency of the vocal tract</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Second formant frequency (mean), Hz</td>
                  <td colspan="2">Second resonant frequency of the vocal tract</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Gravity (mean), Hz</td>
                  <td colspan="2">Center frequency of the narrow band spectrum</td>
                </tr>
                <tr valign="top">
                  <td>
                    <break/>
                  </td>
                  <td>Deviation (mean), Hz</td>
                  <td colspan="2">Spread of frequencies around the spectral gravity</td>
                </tr>
              </tbody>
            </table>
          </table-wrap>
          <p>Timing and fluency features have previously been demonstrated to contain important clinical information for conditions including depression [<xref ref-type="bibr" rid="ref7">7</xref>,<xref ref-type="bibr" rid="ref64">64</xref>], ALS [<xref ref-type="bibr" rid="ref65">65</xref>], and Parkinson disease [<xref ref-type="bibr" rid="ref66">66</xref>]. Respiration and phonation features are widely used in speech-based mental health analysis [<xref ref-type="bibr" rid="ref7">7</xref>,<xref ref-type="bibr" rid="ref9">9</xref>,<xref ref-type="bibr" rid="ref10">10</xref>]. Articulation features have been included as they indicate changes in speech intelligibility and speech-motor control and have been proposed as markers for a variety of health conditions [<xref ref-type="bibr" rid="ref9">9</xref>-<xref ref-type="bibr" rid="ref11">11</xref>,<xref ref-type="bibr" rid="ref67">67</xref>].</p>
          <p>To extract these features, we first used Parselmouth [<xref ref-type="bibr" rid="ref68">68</xref>] to convert all audio files to single-channel 16-kHz waveform audio file format files with 16-bit resolution. Our acoustic features were extracted at two levels: (1) suprasegmentally—calculated over the entire reading—and (2) for individual occurrences of open /a/vowels of at least 50 ms duration from “The Rainbow Passage.” For the /a/ vowels, we extracted the features per identified instance of the vowel and calculated the mean per recording over all instances. We provide suprasegmental acoustic features, as this is a common approach in paralinguistic analyses [<xref ref-type="bibr" rid="ref69">69</xref>]. Extraction purely from /a/ vowels, in contrast, provides more granular, controlled acoustic measures of speech. The use of the open /a/ vowel has been recommended for more reliable extraction of voice quality measures [<xref ref-type="bibr" rid="ref70">70</xref>]. We report results in terms of the median (IQR) for the day and week studies separately, providing a resource of normative values for use in future analysis.</p>
          <p>As a more realistic and affordable approach toward clinical research, we implemented an automated approach to identify instances of /a/ in our files. First, we transcribed our files offline using the Open AI whisper-base.en model [<xref ref-type="bibr" rid="ref71">71</xref>], an open-source automatic speech recognition (ASR) tool, which has been demonstrated, in independent testing, to have an average word error rate of 12.8% calculated over 9 different ASR test sets [<xref ref-type="bibr" rid="ref72">72</xref>]. We then performed a forced alignment of the resulting transcripts using the Montreal Forced Aligner (MFA) [<xref ref-type="bibr" rid="ref73">73</xref>] and English MFA acoustic model (version 2.0.0a). After identifying the vowels in the phonetic alignment, we extracted the features per identified vowel then took the per-participant, per-session mean of these features to form our final representation. We performed spot-checks of the accuracy of these alignments, dictated by timing and budgetary constraints. Forced alignment software is generally considered reliable, giving near human-level alignments [<xref ref-type="bibr" rid="ref74">74</xref>,<xref ref-type="bibr" rid="ref75">75</xref>]. We conducted cursory spot-checks to assess the reliability of alignments rather than performing a more formal analysis. Due to time and budget constraints, these checks were conducted “by ear” in Praat using the MFA-generated TextGrids to isolate the identified vowels. No specific alignment scores are provided as we lacked annotated ground truth data.</p>
          <p>Features were extracted using Parselmouth [<xref ref-type="bibr" rid="ref68">68</xref>], an open-source Python library that enables the use of Praat, a software package for speech analysis [<xref ref-type="bibr" rid="ref76">76</xref>]. Speech timing features are extracted using intensity thresholds [<xref ref-type="bibr" rid="ref77">77</xref>]. All prosodic, phonation, and articulatory measures were extracted using default Praat settings, except for the extraction of F0, which followed the 2-step approach recommended in the study by Vogel et al [<xref ref-type="bibr" rid="ref64">64</xref>], and cepstral peak prominence, which followed settings recommended in the study by Murton et al [<xref ref-type="bibr" rid="ref78">78</xref>].</p>
        </sec>
        <sec>
          <title>Summary</title>
          <p>Our protocol is unique (<xref ref-type="table" rid="table1">Table 1</xref>): it collects data using multiple microphone types in a controlled environment to control for and minimize variability attributable to hardware, setup, and acoustic conditions. The speech elicitation prompts enable the collection of acoustically rich and varied content while (1) containing a core amount of fixed phonetic content to enable comparable analyses and (2) introducing new readings in each session to minimize potentially confounding practice effects. We collated a list of factors we considered in designing our protocol that may be used as a framework by other researchers designing speech collection protocols (<xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>).</p>
        </sec>
      </sec>
      <sec>
        <title>Ethical Considerations</title>
        <p>The pilot study received approval from the Research Ethics Committee of King’s College London (reference LRS/DP-22/23-36194). As part of our pre-enrollment screening and again in our consent process, participants were asked to confirm their understanding of these criteria and reminded that they should not take part if they did not meet any of them. We did not collect any information in relation to these criteria for ethical reasons and as Article 5(1)(c) of the General Data Protection Regulation stipulates the collection of minimal necessary data [<xref ref-type="bibr" rid="ref79">79</xref>]. Detailed health information was superfluous to our study aims and of a sensitive personal nature.</p>
        <p>Upon completion of the recordings, participants were compensated for their time with e-vouchers redeemable in several shops. For the day cohort participants, these comprised £20 (US $25) for 3 sessions (day 1) and £60 (US $75) for 3 sessions (day 2) to encourage completion of both days. The participants of the week study received £40 (US $50) for 3 sessions.</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <p>Data collection and preliminary analysis was funded from April 1, 2023, to March 31, 2024. Recruitment began on June 5, 2023. Pre-enrollment screening to exclude any hearing, speaking, neurological, or mental health disorders that might affect their speech was completed by 141 participants (<xref rid="figure3" ref-type="fig">Figure 3</xref>). In total, 28 and 26 participants enrolled in the day and week studies, respectively (<xref ref-type="table" rid="table3">Table 3</xref>). One participant in the week study completed 2 of the 3 recording sessions due to illness (<xref rid="figure3" ref-type="fig">Figure 3</xref>). Day 1 recordings began on June 14, 2023, and were completed on August 10, 2023. Day 2 recording began on August 9, 2023, and was completed on October 5, 2023. Week recordings commenced on June 19, 2023, and were completed on October 6, 2023. At the time of submission of this manuscript, analysis was in progress. We plan to submit of our core analysis later in 2025.</p>
      <p>In the day study, the median recording start times for the morning sessions were 9:12 and 9:11 for days 1 and 2, respectively (<xref ref-type="supplementary-material" rid="app3">Multimedia Appendices 3</xref> and <xref ref-type="supplementary-material" rid="app4">4</xref>). The median afternoon and evening recording start times for both days were 14:05 and 18:04, respectively. In the week study, the most common recording slots were 10 AM to 11 AM and 12 PM to 1 PM, with 5 participants each (<xref ref-type="supplementary-material" rid="app5">Multimedia Appendix 5</xref>). Recording times for each participant were consistent across the Monday, Wednesday, and Friday sessions, with differences in start times all &#60;30 (median 13, range 3-22) minutes.</p>
      <p>In total, the study comprised 245 recording sessions and produced 1225 audio files from 5 recording devices, totaling 169 GB of data. Using Audacity, we separated the readings of “The Rainbow Passage” from the condenser microphone and extracted our 14 example speech features using the methodology previously outlined. These values are provided for the day and week study participant groups (<xref ref-type="table" rid="table4">Table 4</xref>).</p>
      <p>The focus of this paper is methodology development. Therefore, an analysis of within-individual speech variation and the ability of different devices to capture this variation is beyond the scope of this paper; it will be reported in future publications.</p>
      <fig id="figure3" position="float">
        <label>Figure 3</label>
        <caption>
          <p>Strengthening the Reporting of Observational Studies in Epidemiology (STROBE) flowchart describing participant recruitment, enrollment, and completion. *Preenrollment was completed via Qualtrics, email, and face-to-face communication.</p>
        </caption>
        <graphic xlink:href="resprot_v14i1e69431_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
      </fig>
      <table-wrap position="float" id="table3">
        <label>Table 3</label>
        <caption>
          <p>Participant characteristics.</p>
        </caption>
        <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
          <col width="30"/>
          <col width="530"/>
          <col width="0"/>
          <col width="220"/>
          <col width="0"/>
          <col width="220"/>
          <thead>
            <tr valign="top">
              <td colspan="3">Characteristics</td>
              <td colspan="2">Day (n=28)</td>
              <td>Week (n=26)</td>
            </tr>
          </thead>
          <tbody>
            <tr valign="top">
              <td colspan="6">
                <bold>Sex, n (%)</bold>
              </td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Female</td>
              <td colspan="2">15 (54)</td>
              <td colspan="2">17 (65)</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Male</td>
              <td colspan="2">13 (46)</td>
              <td colspan="2">9 (35)</td>
            </tr>
            <tr valign="top">
              <td colspan="3">Age (y), median (IQR)</td>
              <td colspan="2">26 (23-34)</td>
              <td>29 (24-34)</td>
            </tr>
            <tr valign="top">
              <td colspan="3">Height (m), median (IQR)</td>
              <td colspan="2">1.70 (1.63-1.79)</td>
              <td>1.71 (1.63-1.78)</td>
            </tr>
            <tr valign="top">
              <td colspan="6">
                <bold>Ethnicity, n (%)</bold>
              </td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Asian or Asian British (Indian, Bangladeshi, and Chinese)</td>
              <td colspan="2">5 (18)</td>
              <td colspan="2">7 (27)</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Arab</td>
              <td colspan="2">0 (0)</td>
              <td colspan="2">1 (4)</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Black, African, Caribbean, and Black British (Caribbean)</td>
              <td colspan="2">1 (4)</td>
              <td colspan="2">1 (4)</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>White (United Kingdom and Ireland)</td>
              <td colspan="2">14 (50)</td>
              <td colspan="2">9 (35)</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>White, other</td>
              <td colspan="2">3 (11)</td>
              <td colspan="2">5 (19)</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Mixed or multiple ethnic groups</td>
              <td colspan="2">4 (14)</td>
              <td colspan="2">1 (4)</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Other ethnic groups</td>
              <td colspan="2">1 (4)</td>
              <td colspan="2">2 (8)</td>
            </tr>
            <tr valign="top">
              <td colspan="6">
                <bold>Language status, n (%)</bold>
              </td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Native English speaker</td>
              <td colspan="2">24 (86)</td>
              <td colspan="2">17 (65)</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Non–native English speaker<sup>a</sup></td>
              <td colspan="2">4 (14)</td>
              <td colspan="2">9 (35)</td>
            </tr>
            <tr valign="top">
              <td colspan="6">
                <bold>Voice use in the 3 mo before recording, n (%)</bold>
              </td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Low</td>
              <td colspan="2">1 (4)</td>
              <td colspan="2">2 (8)</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Intermittent</td>
              <td colspan="2">7 (25)</td>
              <td colspan="2">6 (23)</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Regular</td>
              <td colspan="2">19 (68)</td>
              <td colspan="2">15 (58)</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>High</td>
              <td colspan="2">1 (4)</td>
              <td colspan="2">3 (12)</td>
            </tr>
            <tr valign="top">
              <td colspan="6">
                <bold>Minor health issues, n (%)</bold>
              </td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Allergies</td>
              <td colspan="2">2 (7)</td>
              <td colspan="2">4 (15)</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Sinusitis</td>
              <td colspan="2">1 (4)</td>
              <td colspan="2">0 (0)</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Acid reflux</td>
              <td colspan="2">1 (4)</td>
              <td colspan="2">1 (4)</td>
            </tr>
          </tbody>
        </table>
        <table-wrap-foot>
          <fn id="table3fn1">
            <p><sup>a</sup>All B2 levels or above per the Common European Framework of Reference for Languages [<xref ref-type="bibr" rid="ref54">54</xref>].</p>
          </fn>
        </table-wrap-foot>
      </table-wrap>
      <table-wrap position="float" id="table4">
        <label>Table 4</label>
        <caption>
          <p>Normative median (IQR) feature values for a set of 14 example features extracted from condenser microphone recordings of “The Rainbow Passage.”<sup>a</sup></p>
        </caption>
        <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
          <col width="30"/>
          <col width="270"/>
          <col width="0"/>
          <col width="230"/>
          <col width="0"/>
          <col width="230"/>
          <col width="0"/>
          <col width="240"/>
          <thead>
            <tr valign="top">
              <td colspan="3">Feature extraction level<sup>b</sup></td>
              <td colspan="2">Week (n=26), median (IQR)</td>
              <td colspan="2">Day 1 (n=28), median (IQR)</td>
              <td>Day 2 (n=28), median (IQR)</td>
            </tr>
          </thead>
          <tbody>
            <tr valign="top">
              <td colspan="8">
                <bold>Duration (s)</bold>
              </td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Suprasegmentally</td>
              <td colspan="2">122 (110 to 136)</td>
              <td colspan="2">114 (104 to 126)</td>
              <td colspan="2">113 (103 to 123)</td>
            </tr>
            <tr valign="top">
              <td colspan="8">
                <bold>Speaking rate, (syllables s<sup>–1</sup>)</bold>
              </td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Suprasegmentally</td>
              <td colspan="2">3.69 (3.40 to 3.99)</td>
              <td colspan="2">3.72 (3.53 to 4.06)</td>
              <td colspan="2">3.72 (3.49 to 4.00)</td>
            </tr>
            <tr valign="top">
              <td colspan="8">
                <bold>Articulation rate, (syllables s<sup>–1</sup>)</bold>
              </td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Suprasegmentally</td>
              <td colspan="2">4.63 (4.32 to 4.91)</td>
              <td colspan="2">4.65 (4.47 to 4.87)</td>
              <td colspan="2">4.65 (4.48 to 4.91)</td>
            </tr>
            <tr valign="top">
              <td colspan="8">
                <bold>Pause rate (s<sup>–1</sup>)</bold>
              </td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Suprasegmentally</td>
              <td colspan="2">0.233 (0.205 to 0.264)</td>
              <td colspan="2">0.215 (0.191 to 0.255)</td>
              <td colspan="2">0.214 (0.187 to 0.251)</td>
            </tr>
            <tr valign="top">
              <td colspan="8">
                <bold>Pitch mean (Hz)</bold>
              </td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Suprasegmentally</td>
              <td colspan="2">187 (120 to 202)</td>
              <td colspan="2">146 (111 to 194)</td>
              <td colspan="2">154 (112 to 192)</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Automatically identified /a/ vowels</td>
              <td colspan="2">183 (124 to 203)</td>
              <td colspan="2">150 (114 to 189)</td>
              <td colspan="2">157 (115 to 196)</td>
            </tr>
            <tr valign="top">
              <td colspan="8">
                <bold>Pitch SD (Hz)</bold>
              </td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Suprasegmentally</td>
              <td colspan="2">2.91 (2.54 to 3.57)</td>
              <td colspan="2">2.78 (2.35 to 3.79)</td>
              <td colspan="2">2.89 (2.41 to 3.69)</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Automatically identified /a/ vowels</td>
              <td colspan="2">0.37 (0.26 to 0.59)</td>
              <td colspan="2">0.38 (0.22 to 0.56)</td>
              <td colspan="2">0.35 (0.21 to 0.58)</td>
            </tr>
            <tr valign="top">
              <td colspan="8">
                <bold>Intensity (dB)</bold>
              </td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Suprasegmentally</td>
              <td colspan="2">68.7 (67.1 to 70.1)</td>
              <td colspan="2">68.2 (66.7 to 69.8)</td>
              <td colspan="2">68.5 (66.8 to 69.9)</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Automatically identified /a/ vowels</td>
              <td colspan="2">72.7 (70.8 to 74.5)</td>
              <td colspan="2">72.6 (70.2 to 74.6)</td>
              <td colspan="2">73.0 (70.7 to 74.7)</td>
            </tr>
            <tr valign="top">
              <td colspan="8">
                <bold>Harmonic:noise ratio (dB)</bold>
              </td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Suprasegmentally</td>
              <td colspan="2">10.43 (7.28 to 12.02)</td>
              <td colspan="2">8.33 (6.39 to 9.85)</td>
              <td colspan="2">8.34 (6.69 to 9.97)</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Automatically identified /a/ vowels</td>
              <td colspan="2">8.57 (4.62 to 10.61)</td>
              <td colspan="2">6.44 (4.38 to 7.97)</td>
              <td colspan="2">5.58 (3.72 to 8.33)</td>
            </tr>
            <tr valign="top">
              <td colspan="8">
                <bold>Spectral slope</bold>
              </td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Suprasegmentally</td>
              <td colspan="2">–17.0 (–18.6 to –15.6)</td>
              <td colspan="2">–16.4 (–17.7 to –15.4)</td>
              <td colspan="2">–16.4 (–18.0 to –15.1)</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Automatically identified /a/ vowels</td>
              <td colspan="2">–20.5 (–22.0 to –19.1)</td>
              <td colspan="2">–19.8 (–21.6 to –18.7)</td>
              <td colspan="2">–19.7 (–22.1 to –18.7)</td>
            </tr>
            <tr valign="top">
              <td colspan="8">
                <bold>Cepstral peak prominence (dB)</bold>
              </td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Suprasegmentally</td>
              <td colspan="2">10.14 (9.56 to 10.74)</td>
              <td colspan="2">9.96 (9.43 to 10.69)</td>
              <td colspan="2">10.17 (9.36 to 10.77)</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Automatically identified /a/ vowels</td>
              <td colspan="2">13.91 (12.84 to 15.03)</td>
              <td colspan="2">13.26 (11.76 to 15.03)</td>
              <td colspan="2">13.50 (11.91 to 14.82)</td>
            </tr>
            <tr valign="top">
              <td colspan="8">
                <bold>First formant (Hz)</bold>
              </td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Suprasegmentally</td>
              <td colspan="2">477 (449 to 504)</td>
              <td colspan="2">482 (454 to 507)</td>
              <td colspan="2">475 (450 to 505)</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Automatically identified /a/ vowels</td>
              <td colspan="2">648 (577 to 698)</td>
              <td colspan="2">639 (580 to 674)</td>
              <td colspan="2">628 (571 to 678)</td>
            </tr>
            <tr valign="top">
              <td colspan="8">
                <bold>Second formant (Hz×10<sup>3</sup>)</bold>
              </td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Suprasegmentally</td>
              <td colspan="2">1.65 (1.56 to 1.72)</td>
              <td colspan="2">1.57 (1.49 to 1.64)</td>
              <td colspan="2">1.57 (1.48 to 1.63)</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Automatically identified /a/ vowels</td>
              <td colspan="2">1.33 (1.25 to 1.43)</td>
              <td colspan="2">1.26 (1.17 to 1.36)</td>
              <td colspan="2">1.25 (1.17 to 1.37)</td>
            </tr>
            <tr valign="top">
              <td colspan="8">
                <bold>Spectral gravity (Hz)</bold>
              </td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Suprasegmentally</td>
              <td colspan="2">417 (362 to 465)</td>
              <td colspan="2">453 (388 to 487)</td>
              <td colspan="2">433 (367 to 497)</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Automatically identified /a/ vowels</td>
              <td colspan="2">613 (511 to 687)</td>
              <td colspan="2">651 (564 to 706)</td>
              <td colspan="2">621 (544 to 696)</td>
            </tr>
            <tr valign="top">
              <td colspan="8">
                <bold>Spectral deviation (Hz)</bold>
              </td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Suprasegmentally</td>
              <td colspan="2">330 (286 to 389)</td>
              <td colspan="2">363 (324 to 398)</td>
              <td colspan="2">357 (320 to 393)</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Automatically identified /a/ vowels</td>
              <td colspan="2">361 (310 to 426)</td>
              <td colspan="2">370 (342 to 414)</td>
              <td colspan="2">362 (331 to 407)</td>
            </tr>
          </tbody>
        </table>
        <table-wrap-foot>
          <fn id="table4fn1">
            <p><sup>a</sup>Feature definitions are provided in <xref ref-type="table" rid="table2">Table 2</xref>.</p>
          </fn>
          <fn id="table4fn2">
            <p><sup>b</sup>Features are extracted suprasegmentally or from automatically identified /a/ vowels in readings of “The Rainbow Passage” recorded with a condenser microphone that did not apply any preprocessing.</p>
          </fn>
        </table-wrap-foot>
      </table-wrap>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <p>We developed a protocol and checklist for study design, implementation, and reporting of repeated speech sample recording in the same individuals over time (<xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). The metadata reporting, scheduling, device choices, elicitation tasks, data storage and preparation, and feature extraction provide an adaptable template for other researchers collecting repeated speech samples.</p>
        <p>Our specific research focus was to gain insights into speech variation over the course of a single day and week while controlling for practice effects. The protocol is unique among studies exploring within- and between-speaker variability in a nonpathological population in the variety of speech captured and the number and type of recording devices. This allows us to observe how within-individual variability is captured by mobile devices. Analysis of these aspects will be presented in future work.</p>
        <p>The protocol also enabled us to generate a small but well-described dataset of normative values, which are underreported in the speech biomarker literature [<xref ref-type="bibr" rid="ref34">34</xref>,<xref ref-type="bibr" rid="ref47">47</xref>], of 14 example features commonly used in speech-health research. The insights resulting from this work provide us with a foundation for the design of future data collection and interpretation in clinical cohorts.</p>
      </sec>
      <sec>
        <title>Limitations and Lessons Learned</title>
        <p>The design and implementation of this protocol provided insights that will inform the methodology of future studies.</p>
        <sec>
          <title>Protocol Development</title>
          <p>The design of this protocol was made challenging by the absence of suitable established collection and reporting protocols [<xref ref-type="bibr" rid="ref40">40</xref>]. Discipline silos are a core challenge in speech-based health assessment literature that hinders protocol development and reporting. There is a lack of teams integrating clinical-facing researchers who collect data and researchers who process and analyze the data, who are typically from engineering or computer science backgrounds. This can lead to gaps in the collection and reporting of speaker factors and methodological choices that can influence the measurement of recording speech. Consistent reporting of the effects of speaker-related, recording, and processing factors is urgently required to aid the development of robust speech collection protocols and processing pipelines [<xref ref-type="bibr" rid="ref10">10</xref>,<xref ref-type="bibr" rid="ref80">80</xref>] and to inform the statistical design of speech studies in clinical cohorts [<xref ref-type="bibr" rid="ref47">47</xref>].</p>
        </sec>
        <sec>
          <title>Speech Elicitation Strategies</title>
          <p>The choice of speech elicitation strategies in any protocol is a trade-off between competing factors that include the need to capture indicators of different aspects of speech production, driven by the research question [<xref ref-type="bibr" rid="ref34">34</xref>,<xref ref-type="bibr" rid="ref35">35</xref>]; participant burden and adherence; and strategy-dependent technical challenges in extracting features from recordings. We prioritized a combination of strategies to elicit indicators of several aspects of speech production relevant to mental, neurological, and respiratory health that were quick and easy for participants and did not require them to disclose personal information. These may not be suitable for every biomarker study and application.</p>
        </sec>
        <sec>
          <title>Resource Requirement</title>
          <p>Although participant numbers were small (n=54), the resources required to implement all steps of the protocol—recruitment, data collection, preprocessing of audio files, and feature extraction—were substantial. Data collection and audio preprocessing were particularly labor intensive. Our 245 recording sessions extended from 8 AM to 7:30 PM. We preferred to run these sessions with 2 researchers present to help minimize the likelihood of errors, although this was often not logistically feasible. Regarding preprocessing, we estimate that splitting the 1225 audio files into their individual tasks required close to 720 hours of researcher time. This highlights the need for more efficient recording and annotation techniques to recruit large, well-powered studies.</p>
          <p>One way to increase dataset size and minimize researcher burden when implementing a similar protocol in the future could be to collect data remotely using PCs or smart devices using collection platforms such as RADAR-base [<xref ref-type="bibr" rid="ref81">81</xref>]. Such a solution does not require researcher time to run the recording sessions, and apps can be easily designed to record different speech elicitation activities individually, saving manual segmentation time. However, remote studies are more likely to result in missing data, incorrectly completed tasks, and more variable data quality [<xref ref-type="bibr" rid="ref60">60</xref>,<xref ref-type="bibr" rid="ref82">82</xref>].</p>
          <p>Participant noncompliance, particularly in clinical cohorts, is a further concern in remote studies. Pierce et al [<xref ref-type="bibr" rid="ref50">50</xref>] reported high adherence of 92% of their healthy participants to the prescribed recording times over 7 days. Over collection intervals of up to 18 months, we observed clinical cohort completion rates of 50% (IQR 21%-74%) and 41% (IQR 13%-67%) for the scripted and free-speaking speech tasks, respectively, in RADAR-MDD, where speech was one of &#62;10 data streams. Within the sparse longitudinal literature, the Voiceome Study is a further example, where only 21% of participants completed ≥2 recordings [<xref ref-type="bibr" rid="ref43">43</xref>]. Therefore, there is a need to understand participant motivation and functionality concerns in mobile data collection.</p>
        </sec>
        <sec>
          <title>Recruitment Balance</title>
          <p>Before beginning recording, we aimed to recruit a 50/50 balance of sex at birth. However, we quickly learned that this required a concerted effort to achieve in the fixed time that we had to complete our work, dictated by funder requirements. In total, 101 women completed pre-enrollment forms versus 39 men, which was only achieved following specific appeals for male participants. Our final overall cohort comprised 22 men and 32 women. While not 50/50, this is more balanced than the 75/25 female/male balance of the clinical speech cohort recruited in RADAR-MDD, which was attributed to the greater reported incidence of depression in women [<xref ref-type="bibr" rid="ref7">7</xref>,<xref ref-type="bibr" rid="ref60">60</xref>]. We did achieve good attendance once participants enrolled, with only 1 participant of 54 missing 1 session due to illness. This highlights that participants were engaged and willing to complete the speech tasks.</p>
        </sec>
        <sec>
          <title>Recruitment Feasibility in Clinical Cohorts</title>
          <p>The recording of healthy volunteers in this study was a deliberate design choice; a better understanding of healthy speech is needed to understand changes that accompany pathology. Our choice minimizes variability due to pathology and piloted data collection procedures for future larger studies. Adaptations may be needed to accommodate data collection in clinical populations.</p>
          <p>When implementing our protocol, we benefited from the large pool of potential “healthy” volunteers in our institution. Clinical inclusion criteria could shrink the recruitment pool, and staff and students may be more reluctant to volunteer if it requires disclosure of a diagnosed mental health disorder. Therefore, it remains to be seen if a clinical cohort, such as participants with major depressive disorder, could be recruited for the same protocol recording in a controlled environment, given the need for set recording times and days for 3 to 6 sessions.</p>
          <p>In separate research in a clinical cohort, we have observed that the choice of speech elicitation activity is also important for participant and patient engagement in the context of future mobile speech monitoring apps [<xref ref-type="bibr" rid="ref44">44</xref>]. Fixed, repeated tasks increase the risk of disengagement; for example, we received participant feedback in RADAR-MDD that repeating the same reading every 2 weeks for up to 2 years became tedious. Recruitment in the Voiceome Study was high, but data contribution rates were low, and the lack of engagement was not discussed [<xref ref-type="bibr" rid="ref43">43</xref>].</p>
        </sec>
        <sec>
          <title>Metadata Collection</title>
          <p>A range of speaker-specific factors dictate changes in speech; therefore, the collection of personal data is essential in speech-health studies, as such factors may relate to selection, information, or confounding biases. The collection of such information is a balancing act of analytical goals versus (1) ethical and regulatory considerations that dictate any personal information collected should only relate to what is needed for obtaining meaningful results; (2) participant acceptance and recruitment feasibility, as studies collecting more personal and sensitive information, which may also increase the participation time, may be more challenging to recruit; and (3) logistical considerations, depending on the time and resources available to complete data collection.</p>
          <p>We had ethical, participant acceptability, and logistical factors in mind when deciding what information to collect in our protocol (<xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>). Information that we did not collect but would recommend others consider includes (1) caffeine and alcohol intake before recording [<xref ref-type="bibr" rid="ref83">83</xref>,<xref ref-type="bibr" rid="ref84">84</xref>], (2) medications taken [<xref ref-type="bibr" rid="ref85">85</xref>,<xref ref-type="bibr" rid="ref86">86</xref>], (3) menstrual cycle phase at the time of recording and whether female speakers are menopausal [<xref ref-type="bibr" rid="ref12">12</xref>,<xref ref-type="bibr" rid="ref13">13</xref>], and (4) participant mood using a clinically validated tool.</p>
          <p>As this protocol was for a pilot study, we did not consider getting feedback on metadata collection through patient public involvement work. However, this should be a core consideration when using the underlying methodology in future studies.</p>
        </sec>
        <sec>
          <title>Equipment Setup</title>
          <p>Our setup had 2 limitations with implications for speech measurement precision. First, it was possible for participants to move the position of the office chair on which they were seated during recording as it had wheels and was rotatable. This was a trade-off, as with the chair’s height adjustment feature, participants could be easily centered on the microphone setup per our protocol. We mitigated this risk by observing participants during recording and making gentle reminders no to move the chair and, in rare cases, repositioning the participants. However, participant movement could not be completely excluded.</p>
          <p>Second, there was a limit on how close participants could position their mouths from the microphone, depending on their BMI, as the condenser microphone was set back from the table edge in a fixed position for the study to minimize adjustment of the setup and fully surround it by the acoustic foam enclosure. This had the potential to result in deviations from mouth-microphone distance in our protocol. This issue could be mitigated by positioning the microphone closer to the desk edge, combined with an extension of the acoustic foam to surround the participant and microphone more fully.</p>
          <p>In addition, early in the study, we occasionally observed small amounts of audible interference on recordings from mobile phones and, on rare occasions, phone alert tones and incoming calls. We subsequently requested that participants switch their mobile devices off, place them in flight mode, or leave them outside the recording room during sessions. We later began to set our study phones in flight mode, after occasional, new observations of interference in sessions where interference from the participants’ phones could be excluded.</p>
        </sec>
        <sec>
          <title>Feature Extraction</title>
          <p>Our choice and specification of features to report represented a considerable challenge when developing the protocol. To the best of the authors’ knowledge, there is no agreed minimal benchmark feature set in the literature for such a purpose. In addition, the perturbation and quality measures typically reported in the voice disorder literature [<xref ref-type="bibr" rid="ref62">62</xref>,<xref ref-type="bibr" rid="ref63">63</xref>] are limited; they do not adequately capture all the vocal effects associated with neurological and mental health conditions.</p>
          <p>Meanwhile, predefined multivariate feature sets, such as the extended Geneva Minimalistic Acoustic Parameter Set (eGeMAPs) or the Computational Paralinguistics Challenge Set (ComParE), available in the openSMILE toolkit [<xref ref-type="bibr" rid="ref87">87</xref>,<xref ref-type="bibr" rid="ref88">88</xref>] were not designed for health assessments. For example, these feature sets do not contain specific timing and fluency measures, such as pause rate, a widely used feature in the ALS and depression literature [<xref ref-type="bibr" rid="ref7">7</xref>,<xref ref-type="bibr" rid="ref8">8</xref>,<xref ref-type="bibr" rid="ref65">65</xref>]. A similar feature set to ours is published in the study by Larsen et al [<xref ref-type="bibr" rid="ref89">89</xref>], but it contains jitter and shimmer measurements, which have limited utility when extracted from connected speech [<xref ref-type="bibr" rid="ref70">70</xref>].</p>
          <p>An additional challenge is that many commonly reported features are not uniformly defined or extracted by different extraction tools. For example, Lenain et al [<xref ref-type="bibr" rid="ref90">90</xref>] compared vocal jitter across 3 toolboxes and only obtained weak correlations between the different implementations.</p>
          <p>We used Praat as it is arguably more widely used in speech pathology and phonetics research. However, a weakness of Praat we observed relates to the number of settings associated with extracting each feature; finding guidance on preferred values for these settings is difficult. We also observed that default values were not ideal in certain circumstances. For example, when testing the pitch feature extraction code, we observed that the default pitch ceiling value of 500 Hz could result in false pitch readings of &#62;300 Hz, well outside of expected ranges for this feature.</p>
          <p>A challenge relating to extracting features over specific vowels is reliance on third-party ASR and forced alignment tools. Our choice of Whisper and the MFA was to allow us to extract normative feature values from a processing pipeline comprising standard, open-source, well-established tools. We used these tools offline to maintain data privacy and security. A limitation of our protocol is that, due to resource constraints, we were limited to spot-checks of alignments. However, in subsequent work using this dataset, we have observed differences in timing features extracted using word boundaries estimated from transcripts generated using different ASRs [<xref ref-type="bibr" rid="ref27">27</xref>]. Further work, including manual verbatim and phonetic transcriptions, is required to explore the effects of different ASR tools on the quality and reliability of transcripts and to assess alignment accuracy and isolated vowels [<xref ref-type="bibr" rid="ref91">91</xref>].</p>
        </sec>
      </sec>
      <sec>
        <title>Analysis Plan</title>
        <sec>
          <title>Overview</title>
          <p>We will use data collected using this protocol to assess within-participant variability in speech features within 1 day and 1 week and between recording devices and elicitation tasks. The analysis will include the features we have already extracted (<xref ref-type="table" rid="table2">Table 2</xref>) as well as suitable linguistic features (see examples in the study by Botelho et al [<xref ref-type="bibr" rid="ref92">92</xref>]) extracted from the picture description tasks.</p>
          <p>Our analyses will be in 3 stages. First, we will use test-retest scatter plots to visualize systematic versus random differences between pairs of recording sessions. Second, we will use linear mixed effect models [<xref ref-type="bibr" rid="ref93">93</xref>,<xref ref-type="bibr" rid="ref94">94</xref>] to estimate the within- and between-person variance. Each feature will be tested in a separate model. The models will include a participant random intercept and 2 dummy variables indicating whether the recording was made in the middle of the collection period (lunchtime for recordings over 1 day; Wednesday for recordings over 1 week) or later (evening or Friday). Third, from these models, we will calculate the ICC, the proportion of variance attributable to between-person differences (0=all variation is within-person and 1=all variation is between-person).</p>
          <p>We will use linear mixed effect models to estimate differences in speech features over the day or week, using separate models for each feature. The models will include a participant random intercept and 2 dummy variables indicating whether the recording was made in the middle of the period (lunchtime for recordings over 1 day; Wednesday for recordings over 1 week) or later (evening or Friday).</p>
          <p>We will additionally conduct a device-type sensitivity analysis, also using linear mixed effects models, to compare recordings from our benchmark condenser microphone with other devices we used, as in the study by Botelho et al [<xref ref-type="bibr" rid="ref25">25</xref>]. This will reveal how within-speaker variability is captured in recordings by different mobile devices, which commonly use preprocessing and whose microphone specifications may vary. This analysis is needed to increase our understanding of the ability of mobile health tools to reliably capture changes in speech in research and clinical practice. We expect to complete this analysis and submit the follow-up paper later in 2025.</p>
        </sec>
        <sec>
          <title>Data Utility</title>
          <p>The core research question we set out to investigate with the data collected with this protocol in developing our protocol was within-individual speech variation within 1 day and 1 week, toward longitudinal assessments of health. However, the resulting data have broader utility in speech research and therefore represent value for funding. This is important to consider in study design, given the large resources needed to generate speech corpora.</p>
          <p>We have begun using the data to benchmark different speech technologies (eg, ASR) and quantify associated variability in the feature extraction pipeline [<xref ref-type="bibr" rid="ref27">27</xref>]. We have also demonstrated practice effects in repeated readings [<xref ref-type="bibr" rid="ref95">95</xref>]. Further utility is gained from recording over multiple devices and using different elicitation methods, allowing us to assess variability in speech features according to these key methodological choices. It is vital to characterize such variation in speech over repeated speech samples to identify and develop reliable speech marker pipelines for clinical research and practice. Finally, we are also preparing to make the datasets accessible to other nonprofit researchers, enabling other investigations.</p>
        </sec>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>In the speech-based health assessment literature, core methodological details and speaker characteristics are often underreported or the rationale for choices not explained. Underlying this, there is a need for more considered design of speech data curation. With this in mind, we have described a protocol for collecting nonpathological repeated speech samples. The core methodological aspects of this protocol cover design and reporting decisions that are relevant for researchers collecting longitudinal data for speech and language biomarker research. We encourage other researchers to adopt similar practices and consider the aspects we highlight in their own projects, thereby adding replicability and, ultimately, the translation of speech and language biomarkers into clinical research and practice.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>Checklist of methodological aspects for consideration in protocol design and reporting.</p>
        <media xlink:href="resprot_v14i1e69431_app1.docx" xlink:title="DOCX File , 385 KB"/>
      </supplementary-material>
      <supplementary-material id="app2">
        <label>Multimedia Appendix 2</label>
        <p>Prerecording questionnaire completed by participants at the start of each recording session.</p>
        <media xlink:href="resprot_v14i1e69431_app2.docx" xlink:title="DOCX File , 31 KB"/>
      </supplementary-material>
      <supplementary-material id="app3">
        <label>Multimedia Appendix 3</label>
        <p>The day cohort reported recording times, median (IQR). Day 1 and day 2 were 8 to 11 weeks apart, on the same weekday.</p>
        <media xlink:href="resprot_v14i1e69431_app3.docx" xlink:title="DOCX File , 24 KB"/>
      </supplementary-material>
      <supplementary-material id="app4">
        <label>Multimedia Appendix 4</label>
        <p>Intervals between sessions, median (IQR) in minutes for day 1 and day 2 in the day study.</p>
        <media xlink:href="resprot_v14i1e69431_app4.docx" xlink:title="DOCX File , 25 KB"/>
      </supplementary-material>
      <supplementary-material id="app5">
        <label>Multimedia Appendix 5</label>
        <p>Distribution of recording times in the week study.</p>
        <media xlink:href="resprot_v14i1e69431_app5.docx" xlink:title="DOCX File , 25 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">ALS</term>
          <def>
            <p>amyotrophic lateral sclerosis</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">ASR</term>
          <def>
            <p>automatic speech recognition</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">Cape-V</term>
          <def>
            <p>Consensus Auditory-Perceptual Evaluation of Voice</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">ComParE</term>
          <def>
            <p>Computational Paralinguistics Challenge Set</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">eGeMAPs</term>
          <def>
            <p>extended Geneva Minimalistic Acoustic Parameter Set</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">ICC</term>
          <def>
            <p>intraclass correlation</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">MFA</term>
          <def>
            <p>Montreal Forced Aligner</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">RADAR-MDD</term>
          <def>
            <p>Remote Assessment of Disease and Relapse—Major Depressive Disorder</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>The authors thank the participants for their valuable support and the King’s College London Department of Psychology for the use of their test rooms for recording. This project was funded by a combination of an Engineering and Physical Sciences Research Council and UK Acoustics Network Plus grant (reference: EP/V007866/1) and an IPEM Innovation Award. This research is partly funded by the National Institute for Health and Care Research (NIHR) Maudsley Biomedical Research Centre. The views expressed are those of the authors and not necessarily those of the NIHR or the Department of Health and Social Care. ZR and CL were supported by the King’s Undergraduate Research Fellowship. TP was supported by a Wellcome Trust Summer Internship.</p>
    </ack>
    <notes>
      <sec>
        <title>Disclaimer</title>
        <p>Massachusetts Institute of Technology Lincoln Laboratory disclaimer: approved for public release. Distribution is unlimited. This material is based upon work supported by the Under Secretary of Defense for Research and Engineering under Air Force Contract No. FA8702-15-D-0001. Any opinions, findings, conclusions, or recommendations expressed in this material are those of the authors and do not necessarily reflect the views of the Under Secretary of Defense for Research and Engineering.</p>
      </sec>
    </notes>
    <notes>
      <sec>
        <title>Data Availability</title>
        <p>All code is available upon request from the corresponding author. Datasets will also be made available subject to participant consent and completion of a data use agreement.</p>
      </sec>
    </notes>
    <fn-group>
      <fn fn-type="con">
        <p>NC and J Dineley were responsible for conception, design, acquisition, analysis, data interpretation, and manuscript drafting and editing. LLW was responsible for design, acquisition, analysis, and manuscript drafting and editing. EC was responsible for conception, analysis, and manuscript editing. CL, ZR, and TP were responsible for acquisition and analysis. FM was responsible for conception, design, and manuscript editing. J Downs and RD were responsible for conception and manuscript editing. TFQ was responsible for conception, data interpretation, and manuscript editing.</p>
      </fn>
      <fn fn-type="conflict">
        <p>NC is a consultant to thymia Ltd. RD is a director of CogStack Ltd and Onsentia Ltd. All other authors have no other conflicts to declare.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Eshghi</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Yunusova</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Connaghan</surname>
              <given-names>KP</given-names>
            </name>
            <name name-style="western">
              <surname>Perry</surname>
              <given-names>BJ</given-names>
            </name>
            <name name-style="western">
              <surname>Maffei</surname>
              <given-names>MF</given-names>
            </name>
            <name name-style="western">
              <surname>Berry</surname>
              <given-names>JD</given-names>
            </name>
            <name name-style="western">
              <surname>Zinman</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Kalra</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Korngut</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Genge</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Dionne</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Green</surname>
              <given-names>JR</given-names>
            </name>
          </person-group>
          <article-title>Rate of speech decline in individuals with amyotrophic lateral sclerosis</article-title>
          <source>Sci Rep</source>
          <year>2022</year>
          <month>09</month>
          <day>20</day>
          <volume>12</volume>
          <issue>1</issue>
          <fpage>15713</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/s41598-022-19651-1"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41598-022-19651-1</pub-id>
          <pub-id pub-id-type="medline">36127362</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41598-022-19651-1</pub-id>
          <pub-id pub-id-type="pmcid">PMC9489769</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rusz</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Krack</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Tripoliti</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>From prodromal stages to clinical trials: the promise of digital speech biomarkers in Parkinson's disease</article-title>
          <source>Neurosci Biobehav Rev</source>
          <year>2024</year>
          <month>12</month>
          <volume>167</volume>
          <fpage>105922</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S0149-7634(24)00391-9"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.neubiorev.2024.105922</pub-id>
          <pub-id pub-id-type="medline">39424108</pub-id>
          <pub-id pub-id-type="pii">S0149-7634(24)00391-9</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rios-Urrego</surname>
              <given-names>CD</given-names>
            </name>
            <name name-style="western">
              <surname>Rusz</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Orozco-Arroyave</surname>
              <given-names>JR</given-names>
            </name>
          </person-group>
          <article-title>Automatic speech-based assessment to discriminate Parkinson's disease from essential tremor with a cross-language approach</article-title>
          <source>NPJ Digit Med</source>
          <year>2024</year>
          <month>02</month>
          <day>17</day>
          <volume>7</volume>
          <issue>1</issue>
          <fpage>37</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/s41746-024-01027-6"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41746-024-01027-6</pub-id>
          <pub-id pub-id-type="medline">38368458</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41746-024-01027-6</pub-id>
          <pub-id pub-id-type="pmcid">PMC10874421</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Moro-Velazquez</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Gomez-Garcia</surname>
              <given-names>JA</given-names>
            </name>
            <name name-style="western">
              <surname>Arias-Londoño</surname>
              <given-names>JD</given-names>
            </name>
            <name name-style="western">
              <surname>Dehak</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Godino-Llorente</surname>
              <given-names>JI</given-names>
            </name>
          </person-group>
          <article-title>Advances in Parkinson's disease detection and assessment using voice and speech: a review of the articulatory and phonatory aspects</article-title>
          <source>Biomed Signal Process Control</source>
          <year>2021</year>
          <month>04</month>
          <volume>66</volume>
          <fpage>102418</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1016/j.bspc.2021.102418"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.bspc.2021.102418</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Corcoran</surname>
              <given-names>CM</given-names>
            </name>
            <name name-style="western">
              <surname>Cecchi</surname>
              <given-names>GA</given-names>
            </name>
          </person-group>
          <article-title>Using language processing and speech analysis for the identification of psychosis and other disorders</article-title>
          <source>Biol Psychiatry Cogn Neurosci Neuroimaging</source>
          <year>2020</year>
          <month>08</month>
          <volume>5</volume>
          <issue>8</issue>
          <fpage>770</fpage>
          <lpage>9</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/32771179"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.bpsc.2020.06.004</pub-id>
          <pub-id pub-id-type="medline">32771179</pub-id>
          <pub-id pub-id-type="pii">S2451-9022(20)30154-3</pub-id>
          <pub-id pub-id-type="pmcid">PMC7430500</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Olah</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Spencer</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Cummins</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Diederen</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Automated analysis of speech as a marker of sub-clinical psychotic experiences</article-title>
          <source>Front Psychiatry</source>
          <year>2023</year>
          <month>2</month>
          <day>1</day>
          <volume>14</volume>
          <fpage>1265880</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/38361830"/>
          </comment>
          <pub-id pub-id-type="doi">10.3389/fpsyt.2023.1265880</pub-id>
          <pub-id pub-id-type="medline">38361830</pub-id>
          <pub-id pub-id-type="pmcid">PMC10867252</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cummins</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Dineley</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Conde</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Matcham</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Siddi</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Lamers</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Carr</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Lavelle</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Leightley</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>White</surname>
              <given-names>KM</given-names>
            </name>
            <name name-style="western">
              <surname>Oetzmann</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Campbell</surname>
              <given-names>EL</given-names>
            </name>
            <name name-style="western">
              <surname>Simblett</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Bruce</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Haro</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Penninx</surname>
              <given-names>BW</given-names>
            </name>
            <name name-style="western">
              <surname>Ranjan</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Rashid</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Stewart</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Folarin</surname>
              <given-names>AA</given-names>
            </name>
            <name name-style="western">
              <surname>Bailón</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Schuller</surname>
              <given-names>BW</given-names>
            </name>
            <name name-style="western">
              <surname>Wykes</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Vairavan</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Dobson</surname>
              <given-names>RJ</given-names>
            </name>
            <name name-style="western">
              <surname>Narayan</surname>
              <given-names>VA</given-names>
            </name>
            <name name-style="western">
              <surname>Hotopf</surname>
              <given-names>M</given-names>
            </name>
            <collab>RADAR-CNS Consortium</collab>
          </person-group>
          <article-title>Multilingual markers of depression in remotely collected speech samples: a preliminary analysis</article-title>
          <source>J Affect Disord</source>
          <year>2023</year>
          <month>11</month>
          <day>15</day>
          <volume>341</volume>
          <fpage>128</fpage>
          <lpage>36</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S0165-0327(23)01076-5"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jad.2023.08.097</pub-id>
          <pub-id pub-id-type="medline">37598722</pub-id>
          <pub-id pub-id-type="pii">S0165-0327(23)01076-5</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mundt</surname>
              <given-names>JC</given-names>
            </name>
            <name name-style="western">
              <surname>Vogel</surname>
              <given-names>AP</given-names>
            </name>
            <name name-style="western">
              <surname>Feltner</surname>
              <given-names>DE</given-names>
            </name>
            <name name-style="western">
              <surname>Lenderking</surname>
              <given-names>WR</given-names>
            </name>
          </person-group>
          <article-title>Vocal acoustic biomarkers of depression severity and treatment response</article-title>
          <source>Biol Psychiatry</source>
          <year>2012</year>
          <month>10</month>
          <day>01</day>
          <volume>72</volume>
          <issue>7</issue>
          <fpage>580</fpage>
          <lpage>7</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/22541039"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.biopsych.2012.03.015</pub-id>
          <pub-id pub-id-type="medline">22541039</pub-id>
          <pub-id pub-id-type="pii">S0006-3223(12)00263-6</pub-id>
          <pub-id pub-id-type="pmcid">PMC3409931</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cummins</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Scherer</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Krajewski</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Schnieder</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Epps</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Quatieri</surname>
              <given-names>TF</given-names>
            </name>
          </person-group>
          <article-title>A review of depression and suicide risk assessment using speech analysis</article-title>
          <source>Speech Commun</source>
          <year>2015</year>
          <month>07</month>
          <volume>71</volume>
          <fpage>10</fpage>
          <lpage>49</lpage>
          <pub-id pub-id-type="doi">10.1016/j.specom.2015.03.004</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Low</surname>
              <given-names>DM</given-names>
            </name>
            <name name-style="western">
              <surname>Bentley</surname>
              <given-names>KH</given-names>
            </name>
            <name name-style="western">
              <surname>Ghosh</surname>
              <given-names>SS</given-names>
            </name>
          </person-group>
          <article-title>Automated assessment of psychiatric disorders using speech: a systematic review</article-title>
          <source>Laryngoscope Investig Otolaryngol</source>
          <year>2020</year>
          <month>02</month>
          <day>31</day>
          <volume>5</volume>
          <issue>1</issue>
          <fpage>96</fpage>
          <lpage>116</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/32128436"/>
          </comment>
          <pub-id pub-id-type="doi">10.1002/lio2.354</pub-id>
          <pub-id pub-id-type="medline">32128436</pub-id>
          <pub-id pub-id-type="pii">LIO2354</pub-id>
          <pub-id pub-id-type="pmcid">PMC7042657</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hecker</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Steckhan</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Eyben</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Schuller</surname>
              <given-names>BW</given-names>
            </name>
            <name name-style="western">
              <surname>Arnrich</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>Voice analysis for neurological disorder recognition-a systematic review and perspective on emerging trends</article-title>
          <source>Front Digit Health</source>
          <year>2022</year>
          <month>7</month>
          <day>7</day>
          <volume>4</volume>
          <fpage>842301</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/35899034"/>
          </comment>
          <pub-id pub-id-type="doi">10.3389/fdgth.2022.842301</pub-id>
          <pub-id pub-id-type="medline">35899034</pub-id>
          <pub-id pub-id-type="pmcid">PMC9309252</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bryant</surname>
              <given-names>GA</given-names>
            </name>
            <name name-style="western">
              <surname>Haselton</surname>
              <given-names>MG</given-names>
            </name>
          </person-group>
          <article-title>Vocal cues of ovulation in human females</article-title>
          <source>Biol Lett</source>
          <year>2009</year>
          <month>02</month>
          <day>23</day>
          <volume>5</volume>
          <issue>1</issue>
          <fpage>12</fpage>
          <lpage>5</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/18845518"/>
          </comment>
          <pub-id pub-id-type="doi">10.1098/rsbl.2008.0507</pub-id>
          <pub-id pub-id-type="medline">18845518</pub-id>
          <pub-id pub-id-type="pii">0W42954J6731840N</pub-id>
          <pub-id pub-id-type="pmcid">PMC2657750</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Fischer</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Semple</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Fickenscher</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Jürgens</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Kruse</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Heistermann</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Amir</surname>
              <given-names>O</given-names>
            </name>
          </person-group>
          <article-title>Do women's voices provide cues of the likelihood of ovulation? The importance of sampling regime</article-title>
          <source>PLoS One</source>
          <year>2011</year>
          <month>9</month>
          <day>21</day>
          <volume>6</volume>
          <issue>9</issue>
          <fpage>e24490</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.plos.org/10.1371/journal.pone.0024490"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0024490</pub-id>
          <pub-id pub-id-type="medline">21957453</pub-id>
          <pub-id pub-id-type="pii">PONE-D-11-12884</pub-id>
          <pub-id pub-id-type="pmcid">PMC3177841</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Vogel</surname>
              <given-names>AP</given-names>
            </name>
            <name name-style="western">
              <surname>Fletcher</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Maruff</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Acoustic analysis of the effects of sustained wakefulness on speech</article-title>
          <source>J Acoust Soc Am</source>
          <year>2010</year>
          <month>12</month>
          <volume>128</volume>
          <issue>6</issue>
          <fpage>3747</fpage>
          <lpage>56</lpage>
          <pub-id pub-id-type="doi">10.1121/1.3506349</pub-id>
          <pub-id pub-id-type="medline">21218906</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ilomäki</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Leppänen</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Kleemola</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Tyrmi</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Laukkanen</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Vilkman</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Relationships between self-evaluations of voice and working conditions, background factors, and phoniatric findings in female teachers</article-title>
          <source>Logoped Phoniatr Vocol</source>
          <year>2009</year>
          <month>07</month>
          <day>11</day>
          <volume>34</volume>
          <issue>1</issue>
          <fpage>20</fpage>
          <lpage>31</lpage>
          <pub-id pub-id-type="doi">10.1080/14015430802042013</pub-id>
          <pub-id pub-id-type="medline">19283550</pub-id>
          <pub-id pub-id-type="pii">909481486</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Laukkanen</surname>
              <given-names>AM</given-names>
            </name>
            <name name-style="western">
              <surname>Ilomäki</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Leppänen</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Vilkman</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Acoustic measures and self-reports of vocal fatigue by female teachers</article-title>
          <source>J Voice</source>
          <year>2008</year>
          <month>05</month>
          <volume>22</volume>
          <issue>3</issue>
          <fpage>283</fpage>
          <lpage>9</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1016/j.jvoice.2006.10.001"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jvoice.2006.10.001</pub-id>
          <pub-id pub-id-type="medline">17134877</pub-id>
          <pub-id pub-id-type="pii">S0892-1997(06)00133-0</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Laukkanen</surname>
              <given-names>AM</given-names>
            </name>
            <name name-style="western">
              <surname>Kankare</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Vocal loading-related changes in male teachers' voices investigated before and after a working day</article-title>
          <source>Folia Phoniatr Logop</source>
          <year>2006</year>
          <month>7</month>
          <day>10</day>
          <volume>58</volume>
          <issue>4</issue>
          <fpage>229</fpage>
          <lpage>39</lpage>
          <pub-id pub-id-type="doi">10.1159/000093180</pub-id>
          <pub-id pub-id-type="medline">16825776</pub-id>
          <pub-id pub-id-type="pii">93180</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Davletcharova</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Sugathan</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Abraham</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>James</surname>
              <given-names>AP</given-names>
            </name>
          </person-group>
          <article-title>Detection and analysis of emotion from speech signals</article-title>
          <source>Procedia Comput Sci</source>
          <year>2015</year>
          <volume>58</volume>
          <fpage>91</fpage>
          <lpage>6</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1016/j.procs.2015.08.032"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.procs.2015.08.032</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Alves</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Krüger</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Pillay</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>van Lierde</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>van der Linde</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>The effect of hydration on voice quality in adults: a systematic review</article-title>
          <source>J Voice</source>
          <year>2019</year>
          <month>01</month>
          <volume>33</volume>
          <issue>1</issue>
          <fpage>125.e13</fpage>
          <lpage>28</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1016/j.jvoice.2017.10.001"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jvoice.2017.10.001</pub-id>
          <pub-id pub-id-type="medline">29122414</pub-id>
          <pub-id pub-id-type="pii">S0892-1997(17)30389-2</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lã</surname>
              <given-names>FM</given-names>
            </name>
            <name name-style="western">
              <surname>Ardura</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>What voice-related metrics change with menopause? A systematic review and meta-analysis study</article-title>
          <source>J Voice</source>
          <year>2022</year>
          <month>05</month>
          <volume>36</volume>
          <issue>3</issue>
          <fpage>438.e1</fpage>
          <lpage>17</lpage>
          <pub-id pub-id-type="doi">10.1016/j.jvoice.2020.06.012</pub-id>
          <pub-id pub-id-type="medline">32660847</pub-id>
          <pub-id pub-id-type="pii">S0892-1997(20)30223-X</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Oliveira Santos</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Godoy</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Silverio</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Brasolotto</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Vocal changes of men and women from different age decades: an analysis from 30 years of age</article-title>
          <source>J Voice</source>
          <year>2023</year>
          <month>11</month>
          <volume>37</volume>
          <issue>6</issue>
          <fpage>840</fpage>
          <lpage>50</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1016/j.jvoice.2021.06.003"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jvoice.2021.06.003</pub-id>
          <pub-id pub-id-type="medline">34284927</pub-id>
          <pub-id pub-id-type="pii">S0892-1997(21)00185-5</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Stathopoulos</surname>
              <given-names>ET</given-names>
            </name>
            <name name-style="western">
              <surname>Huber</surname>
              <given-names>JE</given-names>
            </name>
            <name name-style="western">
              <surname>Sussman</surname>
              <given-names>JE</given-names>
            </name>
          </person-group>
          <article-title>Changes in acoustic characteristics of the voice across the life span: measures from individuals 4-93 years of age</article-title>
          <source>J Speech Lang Hear Res</source>
          <year>2011</year>
          <month>08</month>
          <volume>54</volume>
          <issue>4</issue>
          <fpage>1011</fpage>
          <lpage>21</lpage>
          <pub-id pub-id-type="doi">10.1044/1092-4388(2010/10-0036)</pub-id>
          <pub-id pub-id-type="medline">21173391</pub-id>
          <pub-id pub-id-type="pii">1092-4388_2010_10-0036</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rojas</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Kefalianos</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Vogel</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>How does our voice change as we age? A systematic review and meta-analysis of acoustic and perceptual voice data from healthy adults over 50 years of age</article-title>
          <source>J Speech Lang Hear Res</source>
          <year>2020</year>
          <month>02</month>
          <day>26</day>
          <volume>63</volume>
          <issue>2</issue>
          <fpage>533</fpage>
          <lpage>51</lpage>
          <pub-id pub-id-type="doi">10.1044/2019_jslhr-19-00099</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Awan</surname>
              <given-names>SN</given-names>
            </name>
            <name name-style="western">
              <surname>Bahr</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Watts</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Boyer</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Budinsky</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Bensoussan</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Validity of acoustic measures obtained using various recording methods including smartphones with and without headset microphones</article-title>
          <source>J Speech Lang Hear Res</source>
          <year>2024</year>
          <month>06</month>
          <day>06</day>
          <volume>67</volume>
          <issue>6</issue>
          <fpage>1712</fpage>
          <lpage>30</lpage>
          <pub-id pub-id-type="doi">10.1044/2024_jslhr-23-00759</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Botelho</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Schultz</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Abad</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Trancoso</surname>
              <given-names>I</given-names>
            </name>
          </person-group>
          <article-title>Challenges of using longitudinal and cross-domain corpora on studies of pathological speech</article-title>
          <source>Proc Interspeech</source>
          <year>2022</year>
          <fpage>1921</fpage>
          <lpage>5</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.isca-archive.org/interspeech_2022/botelho22_interspeech.html"/>
          </comment>
          <pub-id pub-id-type="doi">10.21437/interspeech.2022-10995</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Dineley</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Carr</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Matcham</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Downs</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Dobson</surname>
              <given-names>RJ</given-names>
            </name>
            <name name-style="western">
              <surname>Quatieri</surname>
              <given-names>TF</given-names>
            </name>
            <name name-style="western">
              <surname>Cummins</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <article-title>Towards robust paralinguistic assessment for real-world mobile health (mHealth) monitoring: an initial study of reverberation effects on speech</article-title>
          <source>Proc Interspeech</source>
          <year>2023</year>
          <volume>3</volume>
          <fpage>2373</fpage>
          <lpage>7</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.isca-archive.org/interspeech_2023/dineley23_interspeech.html#"/>
          </comment>
          <pub-id pub-id-type="doi">10.21437/interspeech.2023-947</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Dineley</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Carr</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>White</surname>
              <given-names>LL</given-names>
            </name>
            <name name-style="western">
              <surname>Lucas</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Rahman</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Pan</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Matcham</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Downs</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Variability of speech timing features across repeated recordings: a comparison of open-source extraction techniques</article-title>
          <source>Proc Interspeech</source>
          <year>2024</year>
          <volume>5</volume>
          <fpage>1</fpage>
          <lpage>5</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.isca-archive.org/interspeech_2024/dineley24_interspeech.pdf"/>
          </comment>
          <pub-id pub-id-type="doi">10.21437/interspeech.2024-1074</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Oreskovic</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Kaufman</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Fossat</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Impact of audio data compression on feature extraction for vocal biomarker detection: validation study</article-title>
          <source>JMIR Biomed Eng</source>
          <year>2024</year>
          <month>04</month>
          <day>15</day>
          <volume>9</volume>
          <fpage>e56246</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://biomedeng.jmir.org/2024//e56246/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/56246</pub-id>
          <pub-id pub-id-type="medline">38875677</pub-id>
          <pub-id pub-id-type="pii">v9i1e56246</pub-id>
          <pub-id pub-id-type="pmcid">PMC11058552</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Botelho</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Abad</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Schultz</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Trancoso</surname>
              <given-names>I</given-names>
            </name>
          </person-group>
          <article-title>Towards reference speech characterization for health applications</article-title>
          <source>Proc Interspeech</source>
          <year>2023</year>
          <volume>3</volume>
          <fpage>2363</fpage>
          <lpage>7</lpage>
          <pub-id pub-id-type="doi">10.21437/interspeech.2023-1435</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lammert</surname>
              <given-names>AC</given-names>
            </name>
            <name name-style="western">
              <surname>Melot</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Sturim</surname>
              <given-names>DE</given-names>
            </name>
            <name name-style="western">
              <surname>Hannon</surname>
              <given-names>DJ</given-names>
            </name>
            <name name-style="western">
              <surname>DeLaura</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Williamson</surname>
              <given-names>JR</given-names>
            </name>
            <name name-style="western">
              <surname>Ciccarelli</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Quatieri</surname>
              <given-names>TF</given-names>
            </name>
          </person-group>
          <article-title>Analysis of phonetic balance in standard English passages</article-title>
          <source>J Speech Lang Hear Res</source>
          <year>2020</year>
          <month>04</month>
          <day>27</day>
          <volume>63</volume>
          <issue>4</issue>
          <fpage>917</fpage>
          <lpage>30</lpage>
          <pub-id pub-id-type="doi">10.1044/2020_jslhr-19-00001</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Powell</surname>
              <given-names>TW</given-names>
            </name>
          </person-group>
          <article-title>A comparison of English reading passages for elicitation of speech samples from clinical populations</article-title>
          <source>Clin Linguist Phon</source>
          <year>2006</year>
          <month>07</month>
          <day>09</day>
          <volume>20</volume>
          <issue>2-3</issue>
          <fpage>91</fpage>
          <lpage>7</lpage>
          <pub-id pub-id-type="doi">10.1080/02699200400026488</pub-id>
          <pub-id pub-id-type="medline">16428224</pub-id>
          <pub-id pub-id-type="pii">N554331P565G7202</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Giles</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Patterson</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Hodges</surname>
              <given-names>JR</given-names>
            </name>
          </person-group>
          <article-title>Performance on the Boston Cookie theft picture description task in patients with early dementia of the Alzheimer's type: missing information</article-title>
          <source>Aphasiology</source>
          <year>1996</year>
          <month>05</month>
          <volume>10</volume>
          <issue>4</issue>
          <fpage>395</fpage>
          <lpage>408</lpage>
          <pub-id pub-id-type="doi">10.1080/02687039608248419</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Nicholas</surname>
              <given-names>LE</given-names>
            </name>
            <name name-style="western">
              <surname>Brookshire</surname>
              <given-names>RH</given-names>
            </name>
          </person-group>
          <article-title>A system for quantifying the informativeness and efficiency of the connected speech of adults with aphasia</article-title>
          <source>J Speech Hear Res</source>
          <year>1993</year>
          <month>04</month>
          <volume>36</volume>
          <issue>2</issue>
          <fpage>338</fpage>
          <lpage>50</lpage>
          <pub-id pub-id-type="doi">10.1044/jshr.3602.338</pub-id>
          <pub-id pub-id-type="medline">8487525</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Brockmann-Bauser</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>de Paula Soares</surname>
              <given-names>MF</given-names>
            </name>
          </person-group>
          <article-title>Do we get what we need from clinical acoustic voice measurements?</article-title>
          <source>Appl Sci</source>
          <year>2023</year>
          <month>01</month>
          <day>10</day>
          <volume>13</volume>
          <issue>2</issue>
          <fpage>941</fpage>
          <pub-id pub-id-type="doi">10.3390/app13020941</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ng</surname>
              <given-names>SI</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Siegert</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Cummins</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Benway</surname>
              <given-names>NR</given-names>
            </name>
            <name name-style="western">
              <surname>Liss</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Berisha</surname>
              <given-names>V</given-names>
            </name>
          </person-group>
          <article-title>A tutorial on clinical speech ai development: from data collection to model validation</article-title>
          <source>arXiv</source>
          <comment>Preprint posted online October 29, 2024</comment>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/2410.21640v1"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Beglinger</surname>
              <given-names>LJ</given-names>
            </name>
            <name name-style="western">
              <surname>Gaydos</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Tangphao-Daniels</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Duff</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Kareken</surname>
              <given-names>DA</given-names>
            </name>
            <name name-style="western">
              <surname>Crawford</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Fastenau</surname>
              <given-names>PS</given-names>
            </name>
            <name name-style="western">
              <surname>Siemers</surname>
              <given-names>ER</given-names>
            </name>
          </person-group>
          <article-title>Practice effects and the use of alternate forms in serial neuropsychological testing</article-title>
          <source>Arch Clin Neuropsychol</source>
          <year>2005</year>
          <month>06</month>
          <volume>20</volume>
          <issue>4</issue>
          <fpage>517</fpage>
          <lpage>29</lpage>
          <pub-id pub-id-type="doi">10.1016/j.acn.2004.12.003</pub-id>
          <pub-id pub-id-type="medline">15896564</pub-id>
          <pub-id pub-id-type="pii">S0887-6177(05)00003-X</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Collie</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Maruff</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Darby</surname>
              <given-names>DG</given-names>
            </name>
            <name name-style="western">
              <surname>McStephen</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>The effects of practice on the cognitive test performance of neurologically normal individuals assessed at brief test-retest intervals</article-title>
          <source>J Int Neuropsychol Soc</source>
          <year>2003</year>
          <month>03</month>
          <day>25</day>
          <volume>9</volume>
          <issue>3</issue>
          <fpage>419</fpage>
          <lpage>28</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://paperpile.com/b/PBhQdi/xrmV"/>
          </comment>
          <pub-id pub-id-type="doi">10.1017/S1355617703930074</pub-id>
          <pub-id pub-id-type="medline">12666766</pub-id>
          <pub-id pub-id-type="pii">S1355617703930074</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref38">
        <label>38</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Goberman</surname>
              <given-names>AM</given-names>
            </name>
            <name name-style="western">
              <surname>Hughes</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Haydock</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Acoustic characteristics of public speaking: Anxiety and practice effects</article-title>
          <source>Speech Commun</source>
          <year>2011</year>
          <month>7</month>
          <volume>53</volume>
          <issue>6</issue>
          <fpage>867</fpage>
          <lpage>76</lpage>
          <pub-id pub-id-type="doi">10.1016/j.specom.2011.02.005</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref39">
        <label>39</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Strand</surname>
              <given-names>JF</given-names>
            </name>
            <name name-style="western">
              <surname>Brown</surname>
              <given-names>VA</given-names>
            </name>
          </person-group>
          <article-title>Spread the word: enhancing replicability of speech research through stimulus sharing</article-title>
          <source>J Speech Lang Hear Res</source>
          <year>2023</year>
          <month>06</month>
          <day>20</day>
          <volume>66</volume>
          <issue>6</issue>
          <fpage>1967</fpage>
          <lpage>76</lpage>
          <pub-id pub-id-type="doi">10.1044/2022_jslhr-22-00267</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref40">
        <label>40</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Evangelista</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Kale</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>McCutcheon</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Rameau</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Gelbard</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Powell</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Johns</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Law</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Song</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Naunheim</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Watts</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Bryson</surname>
              <given-names>PC</given-names>
            </name>
            <name name-style="western">
              <surname>Crowson</surname>
              <given-names>MG</given-names>
            </name>
            <name name-style="western">
              <surname>Pinto</surname>
              <given-names>J</given-names>
            </name>
            <collab>Bridge2AI-Voice</collab>
            <name name-style="western">
              <surname>Bensoussan</surname>
              <given-names>Yael</given-names>
            </name>
          </person-group>
          <article-title>Current practices in voice data collection and limitations to voice AI research: a national survey</article-title>
          <source>Laryngoscope</source>
          <year>2024</year>
          <month>03</month>
          <volume>134</volume>
          <issue>3</issue>
          <fpage>1333</fpage>
          <lpage>9</lpage>
          <pub-id pub-id-type="doi">10.1002/lary.31052</pub-id>
          <pub-id pub-id-type="medline">38087983</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref41">
        <label>41</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kempster</surname>
              <given-names>GB</given-names>
            </name>
            <name name-style="western">
              <surname>Gerratt</surname>
              <given-names>BR</given-names>
            </name>
            <name name-style="western">
              <surname>Verdolini Abbott</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Barkmeier-Kraemer</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Hillman</surname>
              <given-names>RE</given-names>
            </name>
          </person-group>
          <article-title>Consensus auditory-perceptual evaluation of voice: development of a standardized clinical protocol</article-title>
          <source>Am J Speech Lang Pathol</source>
          <year>2009</year>
          <month>05</month>
          <volume>18</volume>
          <issue>2</issue>
          <fpage>124</fpage>
          <lpage>32</lpage>
          <pub-id pub-id-type="doi">10.1044/1058-0360(2008/08-0017)</pub-id>
          <pub-id pub-id-type="medline">18930908</pub-id>
          <pub-id pub-id-type="pii">1058-0360_2008_08-0017</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref42">
        <label>42</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Patel</surname>
              <given-names>RR</given-names>
            </name>
            <name name-style="western">
              <surname>Awan</surname>
              <given-names>SN</given-names>
            </name>
            <name name-style="western">
              <surname>Barkmeier-Kraemer</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Courey</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Deliyski</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Eadie</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Paul</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Švec</surname>
              <given-names>JG</given-names>
            </name>
            <name name-style="western">
              <surname>Hillman</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Recommended protocols for instrumental assessment of voice: American speech-language-hearing association expert panel to develop a protocol for instrumental assessment of vocal function</article-title>
          <source>Am J Speech Lang Pathol</source>
          <year>2018</year>
          <month>08</month>
          <day>06</day>
          <volume>27</volume>
          <issue>3</issue>
          <fpage>887</fpage>
          <lpage>905</lpage>
          <pub-id pub-id-type="doi">10.1044/2018_AJSLP-17-0009</pub-id>
          <pub-id pub-id-type="medline">29955816</pub-id>
          <pub-id pub-id-type="pii">2686671</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref43">
        <label>43</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Schwoebel</surname>
              <given-names>JW</given-names>
            </name>
            <name name-style="western">
              <surname>Schwartz</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Warrenburg</surname>
              <given-names>LA</given-names>
            </name>
            <name name-style="western">
              <surname>Brown</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Awasthi</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>New</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Butler</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Moss</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>A longitudinal normative dataset and protocol for speech and language biomarker research</article-title>
          <source>medRxiv</source>
          <comment>Preprint posted online August 24, 2021</comment>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.medrxiv.org/content/10.1101/2021.08.16.21262125v1"/>
          </comment>
          <pub-id pub-id-type="doi">10.1101/2021.08.16.21262125</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref44">
        <label>44</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Dineley</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Lavelle</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Leightley</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Matcham</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Siddi</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Peñarrubia-María</surname>
              <given-names>MT</given-names>
            </name>
            <name name-style="western">
              <surname>White</surname>
              <given-names>KM</given-names>
            </name>
            <name name-style="western">
              <surname>Ivan</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Oetzmann</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Simblett</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Dawe-Lane</surname>
              <given-names>E</given-names>
            </name>
            <collab>The RADAR-CNS Consortium</collab>
          </person-group>
          <article-title>Remote smartphone-based speech collection: acceptance and barriers in individuals with major depressive disorder</article-title>
          <source>Proc. Interspeech</source>
          <year>2021</year>
          <month>08</month>
          <fpage>631</fpage>
          <lpage>635</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.isca-archive.org/interspeech_2021/dineley21_interspeech.html#"/>
          </comment>
          <pub-id pub-id-type="doi">10.21437/interspeech.2021-1240</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref45">
        <label>45</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chambers</surname>
              <given-names>CD</given-names>
            </name>
            <name name-style="western">
              <surname>Tzavella</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>The past, present and future of registered reports</article-title>
          <source>Nat Hum Behav</source>
          <year>2022</year>
          <month>01</month>
          <day>15</day>
          <volume>6</volume>
          <issue>1</issue>
          <fpage>29</fpage>
          <lpage>42</lpage>
          <pub-id pub-id-type="doi">10.1038/s41562-021-01193-7</pub-id>
          <pub-id pub-id-type="medline">34782730</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41562-021-01193-7</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref46">
        <label>46</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>SY</given-names>
            </name>
          </person-group>
          <article-title>Why do journals publish research protocols?</article-title>
          <source>Sci Ed</source>
          <year>2022</year>
          <month>08</month>
          <day>19</day>
          <volume>9</volume>
          <issue>2</issue>
          <fpage>146</fpage>
          <lpage>8</lpage>
          <pub-id pub-id-type="doi">10.6087/kcse.280</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref47">
        <label>47</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Robin</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Harrison</surname>
              <given-names>JE</given-names>
            </name>
            <name name-style="western">
              <surname>Kaufman</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Rudzicz</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Simpson</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Yancheva</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Evaluation of speech-based digital biomarkers: review and recommendations</article-title>
          <source>Digit Biomark</source>
          <year>2020</year>
          <month>10</month>
          <day>19</day>
          <volume>4</volume>
          <issue>3</issue>
          <fpage>99</fpage>
          <lpage>108</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1159/000510820"/>
          </comment>
          <pub-id pub-id-type="doi">10.1159/000510820</pub-id>
          <pub-id pub-id-type="medline">33251474</pub-id>
          <pub-id pub-id-type="pii">dib-0004-0099</pub-id>
          <pub-id pub-id-type="pmcid">PMC7670321</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref48">
        <label>48</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Garrett</surname>
              <given-names>KL</given-names>
            </name>
            <name name-style="western">
              <surname>Healey</surname>
              <given-names>EC</given-names>
            </name>
          </person-group>
          <article-title>An acoustic analysis of fluctuations in the voices of normal adult speakers across three times of day</article-title>
          <source>J Acoust Soc Am</source>
          <year>1987</year>
          <month>07</month>
          <volume>82</volume>
          <issue>1</issue>
          <fpage>58</fpage>
          <lpage>62</lpage>
          <pub-id pub-id-type="doi">10.1121/1.395437</pub-id>
          <pub-id pub-id-type="medline">3624641</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref49">
        <label>49</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Leong</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Hawkshaw</surname>
              <given-names>MJ</given-names>
            </name>
            <name name-style="western">
              <surname>Dentchev</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Gupta</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Lurie</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Sataloff</surname>
              <given-names>RT</given-names>
            </name>
          </person-group>
          <article-title>Reliability of objective voice measures of normal speaking voices</article-title>
          <source>J Voice</source>
          <year>2013</year>
          <month>03</month>
          <volume>27</volume>
          <issue>2</issue>
          <fpage>170</fpage>
          <lpage>6</lpage>
          <pub-id pub-id-type="doi">10.1016/j.jvoice.2012.07.005</pub-id>
          <pub-id pub-id-type="medline">23280378</pub-id>
          <pub-id pub-id-type="pii">S0892-1997(12)00106-3</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref50">
        <label>50</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Pierce</surname>
              <given-names>JL</given-names>
            </name>
            <name name-style="western">
              <surname>Tanner</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Merrill</surname>
              <given-names>RM</given-names>
            </name>
            <name name-style="western">
              <surname>Shnowske</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Roy</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <article-title>Acoustic variability in the healthy female voice within and across days: how much and why?</article-title>
          <source>J Speech Lang Hear Res</source>
          <year>2021</year>
          <month>08</month>
          <day>09</day>
          <volume>64</volume>
          <issue>8</issue>
          <fpage>3015</fpage>
          <lpage>31</lpage>
          <pub-id pub-id-type="doi">10.1044/2021_jslhr-21-00018</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref51">
        <label>51</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Feng</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Tang</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Qian</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Jiang</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Test-retest reliability of acoustic and linguistic measures of speech tasks</article-title>
          <source>Comput Speech Lang</source>
          <year>2024</year>
          <month>01</month>
          <volume>83</volume>
          <fpage>101547</fpage>
          <pub-id pub-id-type="doi">10.1016/j.csl.2023.101547</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref52">
        <label>52</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Barnett</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Green</surname>
              <given-names>JR</given-names>
            </name>
            <name name-style="western">
              <surname>Marzouqah</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Stipancic</surname>
              <given-names>KL</given-names>
            </name>
            <name name-style="western">
              <surname>Berry</surname>
              <given-names>JD</given-names>
            </name>
            <name name-style="western">
              <surname>Korngut</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Genge</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Shoesmith</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Briemberg</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Abrahao</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Kalra</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Zinman</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Yunusova</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Reliability &#38; validity of speech and pause measures during passage reading in ALS</article-title>
          <source>Amyotroph Lateral Scler Frontotemporal Degener</source>
          <year>2020</year>
          <month>02</month>
          <day>06</day>
          <volume>21</volume>
          <issue>1-2</issue>
          <fpage>42</fpage>
          <lpage>50</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/32138555"/>
          </comment>
          <pub-id pub-id-type="doi">10.1080/21678421.2019.1697888</pub-id>
          <pub-id pub-id-type="medline">32138555</pub-id>
          <pub-id pub-id-type="pmcid">PMC7080316</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref53">
        <label>53</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Stegmann</surname>
              <given-names>GM</given-names>
            </name>
            <name name-style="western">
              <surname>Hahn</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Liss</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Shefner</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Rutkove</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Kawabata</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Bhandari</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Shelton</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Duncan</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Berisha</surname>
              <given-names>V</given-names>
            </name>
          </person-group>
          <article-title>Repeatability of commonly used speech and language features for clinical applications</article-title>
          <source>Digit Biomark</source>
          <year>2020</year>
          <month>12</month>
          <day>2</day>
          <volume>4</volume>
          <issue>3</issue>
          <fpage>109</fpage>
          <lpage>22</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1159/000511671"/>
          </comment>
          <pub-id pub-id-type="doi">10.1159/000511671</pub-id>
          <pub-id pub-id-type="medline">33442573</pub-id>
          <pub-id pub-id-type="pii">dib-0004-0109</pub-id>
          <pub-id pub-id-type="pmcid">PMC7772887</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref54">
        <label>54</label>
        <nlm-citation citation-type="web">
          <article-title>Common European framework of reference for languages: learning, teaching, assessment</article-title>
          <source>Council of Europe Council for Cultural Co-operation Education Committee Modern Languages Division</source>
          <year>2001</year>
          <access-date>2025-05-29</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://rm.coe.int/common-european-framework-of-reference-for-languages-learning-teaching/16809ea0d4">https://rm.coe.int/common-european-framework-of-reference-for-languages-learning-teaching/16809ea0d4</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref55">
        <label>55</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Desmet</surname>
              <given-names>PM</given-names>
            </name>
            <name name-style="western">
              <surname>Vastenburg</surname>
              <given-names>MH</given-names>
            </name>
            <name name-style="western">
              <surname>Romero</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <article-title>Mood measurement with Pick-A-Mood: review of current methods and design of a pictorial self-report scale</article-title>
          <source>J Design Res</source>
          <year>2016</year>
          <volume>14</volume>
          <issue>3</issue>
          <fpage>241</fpage>
          <lpage>78</lpage>
          <pub-id pub-id-type="doi">10.1504/jdr.2016.079751</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref56">
        <label>56</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <collab>International Phonetic Association</collab>
          </person-group>
          <source>Handbook of the International Phonetic Association: A Guide to the Use of the International Phonetic Alphabet</source>
          <year>1999</year>
          <publisher-loc>Cambridge, MA</publisher-loc>
          <publisher-name>Cambridge University Press</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref57">
        <label>57</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Fairbanks</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <source>Voice and Articulation Drillbook. 2nd edition</source>
          <year>1960</year>
          <publisher-loc>New York, NY</publisher-loc>
          <publisher-name>Harper &#38; Row</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref58">
        <label>58</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Fairbanks</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <source>Voice and Articulation Drillbook</source>
          <year>1940</year>
          <publisher-loc>New York, NY</publisher-loc>
          <publisher-name>Harper &#38; Brothers</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref59">
        <label>59</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Honorof</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>McCullough</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Somerville</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>Comma gets a cure: a diagnostic passage for accent study</article-title>
          <source>IDEA</source>
          <access-date>2025-06-13</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.dialectsarchive.com/comma-gets-a-cure">https://www.dialectsarchive.com/comma-gets-a-cure</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref60">
        <label>60</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Matcham</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Leightley</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Siddi</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Lamers</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>White</surname>
              <given-names>KM</given-names>
            </name>
            <name name-style="western">
              <surname>Annas</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>de Girolamo</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Difrancesco</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Haro</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Horsfall</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Ivan</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Lavelle</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Lombardini</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Mohr</surname>
              <given-names>DC</given-names>
            </name>
            <name name-style="western">
              <surname>Narayan</surname>
              <given-names>VA</given-names>
            </name>
            <name name-style="western">
              <surname>Oetzmann</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Penninx</surname>
              <given-names>BW</given-names>
            </name>
            <name name-style="western">
              <surname>Bruce</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Nica</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Simblett</surname>
              <given-names>SK</given-names>
            </name>
            <name name-style="western">
              <surname>Wykes</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Brasen</surname>
              <given-names>JC</given-names>
            </name>
            <name name-style="western">
              <surname>Myin-Germeys</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Rintala</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Conde</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Dobson</surname>
              <given-names>RJ</given-names>
            </name>
            <name name-style="western">
              <surname>Folarin</surname>
              <given-names>AA</given-names>
            </name>
            <name name-style="western">
              <surname>Stewart</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Ranjan</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Rashid</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Cummins</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Manyakov</surname>
              <given-names>NV</given-names>
            </name>
            <name name-style="western">
              <surname>Vairavan</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Hotopf</surname>
              <given-names>M</given-names>
            </name>
            <collab>RADAR-CNS consortium</collab>
          </person-group>
          <article-title>Remote Assessment of Disease and Relapse in Major Depressive Disorder (RADAR-MDD): recruitment, retention, and data availability in a longitudinal remote measurement study</article-title>
          <source>BMC Psychiatry</source>
          <year>2022</year>
          <month>02</month>
          <day>21</day>
          <volume>22</volume>
          <issue>1</issue>
          <fpage>136</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcpsychiatry.biomedcentral.com/articles/10.1186/s12888-022-03753-1"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12888-022-03753-1</pub-id>
          <pub-id pub-id-type="medline">35189842</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12888-022-03753-1</pub-id>
          <pub-id pub-id-type="pmcid">PMC8860359</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref61">
        <label>61</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Haider</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>de la Fuente</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Luz</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>An assessment of paralinguistic acoustic features for detection of Alzheimer’s dementia in spontaneous speech</article-title>
          <source>IEEE J Sel Top Signal Process</source>
          <year>2020</year>
          <month>2</month>
          <volume>14</volume>
          <issue>2</issue>
          <fpage>272</fpage>
          <lpage>81</lpage>
          <pub-id pub-id-type="doi">10.1109/jstsp.2019.2955022</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref62">
        <label>62</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Maryn</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Roy</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>De Bodt</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Van Cauwenberge</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Corthals</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Acoustic measurement of overall voice quality: a meta-analysis</article-title>
          <source>J Acoust Soc Am</source>
          <year>2009</year>
          <month>11</month>
          <volume>126</volume>
          <issue>5</issue>
          <fpage>2619</fpage>
          <lpage>34</lpage>
          <pub-id pub-id-type="doi">10.1121/1.3224706</pub-id>
          <pub-id pub-id-type="medline">19894840</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref63">
        <label>63</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Maryn</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Corthals</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Van Cauwenberge</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Roy</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>De Bodt</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Toward improved ecological validity in the acoustic measurement of overall voice quality: combining continuous speech and sustained vowels</article-title>
          <source>J Voice</source>
          <year>2010</year>
          <month>09</month>
          <volume>24</volume>
          <issue>5</issue>
          <fpage>540</fpage>
          <lpage>55</lpage>
          <pub-id pub-id-type="doi">10.1016/j.jvoice.2008.12.014</pub-id>
          <pub-id pub-id-type="medline">19883993</pub-id>
          <pub-id pub-id-type="pii">S0892-1997(09)00003-4</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref64">
        <label>64</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Vogel</surname>
              <given-names>AP</given-names>
            </name>
            <name name-style="western">
              <surname>Maruff</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Snyder</surname>
              <given-names>PJ</given-names>
            </name>
            <name name-style="western">
              <surname>Mundt</surname>
              <given-names>JC</given-names>
            </name>
          </person-group>
          <article-title>Standardization of pitch-range settings in voice acoustic analysis</article-title>
          <source>Behav Res Methods</source>
          <year>2009</year>
          <month>5</month>
          <volume>41</volume>
          <issue>2</issue>
          <fpage>318</fpage>
          <lpage>24</lpage>
          <pub-id pub-id-type="doi">10.3758/brm.41.2.318</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref65">
        <label>65</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Green</surname>
              <given-names>JR</given-names>
            </name>
            <name name-style="western">
              <surname>Yunusova</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Kuruvilla</surname>
              <given-names>MS</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Pattee</surname>
              <given-names>GL</given-names>
            </name>
            <name name-style="western">
              <surname>Synhorst</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Zinman</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Berry</surname>
              <given-names>JD</given-names>
            </name>
          </person-group>
          <article-title>Bulbar and speech motor assessment in ALS: challenges and future directions</article-title>
          <source>Amyotroph Lateral Scler Frontotemporal Degener</source>
          <year>2013</year>
          <month>12</month>
          <day>30</day>
          <volume>14</volume>
          <issue>7-8</issue>
          <fpage>494</fpage>
          <lpage>500</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.tandfonline.com/doi/abs/10.3109/21678421.2013.817585?url_ver=Z39.88-2003&#38;rfr_id=ori:rid:crossref.org&#38;rfr_dat=cr_pub  0pubmed"/>
          </comment>
          <pub-id pub-id-type="doi">10.3109/21678421.2013.817585</pub-id>
          <pub-id pub-id-type="medline">23898888</pub-id>
          <pub-id pub-id-type="pmcid">PMC3833808</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref66">
        <label>66</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Skodda</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Aspects of speech rate and regularity in Parkinson's disease</article-title>
          <source>J Neurol Sci</source>
          <year>2011</year>
          <month>11</month>
          <day>15</day>
          <volume>310</volume>
          <issue>1-2</issue>
          <fpage>231</fpage>
          <lpage>6</lpage>
          <pub-id pub-id-type="doi">10.1016/j.jns.2011.07.020</pub-id>
          <pub-id pub-id-type="medline">21849174</pub-id>
          <pub-id pub-id-type="pii">S0022-510X(11)00437-0</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref67">
        <label>67</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Pommée</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Balaguer</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Pinquier</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Mauclair</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Woisard</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Speyer</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Relationship between phoneme-level spectral acoustics and speech intelligibility in healthy speech: a systematic review</article-title>
          <source>Speech Lang Hear</source>
          <year>2021</year>
          <month>04</month>
          <day>17</day>
          <volume>24</volume>
          <issue>2</issue>
          <fpage>105</fpage>
          <lpage>32</lpage>
          <pub-id pub-id-type="doi">10.1080/2050571X.2021.1913300</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref68">
        <label>68</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Jadoul</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Thompson</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>de Boer</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>Introducing parselmouth: a python interface to praat</article-title>
          <source>J Phon</source>
          <year>2018</year>
          <month>11</month>
          <volume>71</volume>
          <fpage>1</fpage>
          <lpage>15</lpage>
          <pub-id pub-id-type="doi">10.1016/j.wocn.2018.07.001</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref69">
        <label>69</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cummins</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Baird</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Schuller</surname>
              <given-names>BW</given-names>
            </name>
          </person-group>
          <article-title>Speech analysis for health: current state-of-the-art and the increasing impact of deep learning</article-title>
          <source>Methods</source>
          <year>2018</year>
          <month>12</month>
          <day>01</day>
          <volume>151</volume>
          <fpage>41</fpage>
          <lpage>54</lpage>
          <pub-id pub-id-type="doi">10.1016/j.ymeth.2018.07.007</pub-id>
          <pub-id pub-id-type="medline">30099083</pub-id>
          <pub-id pub-id-type="pii">S1046-2023(17)30371-7</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref70">
        <label>70</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Brockmann</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Drinnan</surname>
              <given-names>MJ</given-names>
            </name>
            <name name-style="western">
              <surname>Storck</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Carding</surname>
              <given-names>PN</given-names>
            </name>
          </person-group>
          <article-title>Reliable jitter and shimmer measurements in voice clinics: the relevance of vowel, gender, vocal intensity, and fundamental frequency effects in a typical clinical task</article-title>
          <source>J Voice</source>
          <year>2011</year>
          <month>01</month>
          <volume>25</volume>
          <issue>1</issue>
          <fpage>44</fpage>
          <lpage>53</lpage>
          <pub-id pub-id-type="doi">10.1016/j.jvoice.2009.07.002</pub-id>
          <pub-id pub-id-type="medline">20381308</pub-id>
          <pub-id pub-id-type="pii">S0892-1997(09)00110-6</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref71">
        <label>71</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Radford</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>JW</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Brockman</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>McLeavey</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Sutskever</surname>
              <given-names>I</given-names>
            </name>
          </person-group>
          <article-title>Robust speech recognition via large-scale weak supervision</article-title>
          <source>Proceedings of the 40th International Conference on Machine Learning</source>
          <year>2023</year>
          <conf-name>ICML '23</conf-name>
          <conf-date>July 23-29, 2023</conf-date>
          <conf-loc>Honolulu, HI</conf-loc>
          <fpage>28492</fpage>
          <lpage>518</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dl.acm.org/doi/10.5555/3618408.3619590"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref72">
        <label>72</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Peng</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Sudo</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Shakeel</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Watanabe</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>OWSM-CTC: an open encoder-only speech foundation model for speech recognition, translation, and language identification</article-title>
          <source>Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics</source>
          <year>2024</year>
          <conf-name>ACL '24</conf-name>
          <conf-date>August 11-16, 2024</conf-date>
          <conf-loc>Bangkok, Thailand</conf-loc>
          <fpage>10192</fpage>
          <lpage>209</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://aclanthology.org/2024.acl-long.549.pdf"/>
          </comment>
          <pub-id pub-id-type="doi">10.18653/v1/2024.acl-long.549</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref73">
        <label>73</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>McAuliffe</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Socolof</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Mihuc</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Wagner</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Sonderegger</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Montreal forced aligner: trainable text-speech alignment using Kaldi</article-title>
          <source>Proc Interspeech</source>
          <year>2017</year>
          <volume>86</volume>
          <issue>1</issue>
          <fpage>498</fpage>
          <lpage>502</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.isca-archive.org/interspeech_2017/mcauliffe17_interspeech.html#"/>
          </comment>
          <pub-id pub-id-type="doi">10.21437/interspeech.2017-1386</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref74">
        <label>74</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gonzalez</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Grama</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Travis</surname>
              <given-names>CE</given-names>
            </name>
          </person-group>
          <article-title>Comparing the performance of forced aligners used in sociophonetic research</article-title>
          <source>Linguist Vanguard</source>
          <year>2020</year>
          <month>04</month>
          <day>18</day>
          <volume>6</volume>
          <issue>1</issue>
          <fpage>20190058</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.degruyterbrill.com/document/doi/10.1515/lingvan-2019-0058/html"/>
          </comment>
          <pub-id pub-id-type="doi">10.1515/lingvan-2019-0058</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref75">
        <label>75</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mackenzie</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Turton</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Assessing the accuracy of existing forced alignment software on varieties of British English</article-title>
          <source>Linguist Vanguard</source>
          <year>2020</year>
          <month>01</month>
          <day>29</day>
          <volume>6</volume>
          <issue>1</issue>
          <fpage>20180061</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.degruyterbrill.com/document/doi/10.1515/lingvan-2018-0061/html"/>
          </comment>
          <pub-id pub-id-type="doi">10.1515/lingvan-2018-0061</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref76">
        <label>76</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Boersma</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Weenink</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Doing phonetics by computer</article-title>
          <source>Praat</source>
          <year>2025</year>
          <access-date>2022-11-29</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.praat.org">http://www.praat.org</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref77">
        <label>77</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>de Jong</surname>
              <given-names>NH</given-names>
            </name>
            <name name-style="western">
              <surname>Pacilly</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Heeren</surname>
              <given-names>W</given-names>
            </name>
          </person-group>
          <article-title>PRAAT scripts to measure speed fluency and breakdown fluency in speech automatically</article-title>
          <source>Assess Educ Princ Policy Pract</source>
          <year>2021</year>
          <month>07</month>
          <day>25</day>
          <volume>28</volume>
          <issue>4</issue>
          <fpage>456</fpage>
          <lpage>76</lpage>
          <pub-id pub-id-type="doi">10.1080/0969594x.2021.1951162</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref78">
        <label>78</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Murton</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Hillman</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Mehta</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Cepstral peak prominence values for clinical voice evaluation</article-title>
          <source>Am J Speech Lang Pathol</source>
          <year>2020</year>
          <month>08</month>
          <day>04</day>
          <volume>29</volume>
          <issue>3</issue>
          <fpage>1596</fpage>
          <lpage>607</lpage>
          <pub-id pub-id-type="doi">10.1044/2020_ajslp-20-00001</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref79">
        <label>79</label>
        <nlm-citation citation-type="web">
          <article-title>Principles relating to processing of personal data</article-title>
          <source>General Data Protection Regulation (GDPR)</source>
          <access-date>2025-03-13</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://gdpr-info.eu/art-5-gdpr/">https://gdpr-info.eu/art-5-gdpr/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref80">
        <label>80</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ramanarayanan</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Lammert</surname>
              <given-names>AC</given-names>
            </name>
            <name name-style="western">
              <surname>Rowe</surname>
              <given-names>HP</given-names>
            </name>
            <name name-style="western">
              <surname>Quatieri</surname>
              <given-names>TF</given-names>
            </name>
            <name name-style="western">
              <surname>Green</surname>
              <given-names>JR</given-names>
            </name>
          </person-group>
          <article-title>Speech as a biomarker: opportunities, interpretability, and challenges</article-title>
          <source>Speech Sci</source>
          <year>2022</year>
          <month>02</month>
          <day>11</day>
          <volume>7</volume>
          <issue>1</issue>
          <fpage>276</fpage>
          <lpage>83</lpage>
          <pub-id pub-id-type="doi">10.1044/2021_persp-21-00174</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref81">
        <label>81</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ranjan</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Rashid</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Stewart</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Conde</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Begale</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Verbeeck</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Boettcher</surname>
              <given-names>S</given-names>
            </name>
            <collab>Hyve</collab>
            <name name-style="western">
              <surname>Dobson</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Folarin</surname>
              <given-names>A</given-names>
            </name>
            <collab>RADAR-CNS Consortium</collab>
          </person-group>
          <article-title>RADAR-base: open source mobile health platform for collecting, monitoring, and analyzing data using sensors, wearables, and mobile devices</article-title>
          <source>JMIR Mhealth Uhealth</source>
          <year>2019</year>
          <month>08</month>
          <day>01</day>
          <volume>7</volume>
          <issue>8</issue>
          <fpage>e11734</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://mhealth.jmir.org/2019/8/e11734/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/11734</pub-id>
          <pub-id pub-id-type="medline">31373275</pub-id>
          <pub-id pub-id-type="pii">v7i8e11734</pub-id>
          <pub-id pub-id-type="pmcid">PMC6694732</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref82">
        <label>82</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Jepson</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Lohfink</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Arvaniti</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Comparing acoustic analyses of speech data collected remotely</article-title>
          <source>J Acoust Soc Am</source>
          <year>2021</year>
          <month>06</month>
          <volume>149</volume>
          <issue>6</issue>
          <fpage>3910</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/34241427"/>
          </comment>
          <pub-id pub-id-type="doi">10.1121/10.0005132</pub-id>
          <pub-id pub-id-type="medline">34241427</pub-id>
          <pub-id pub-id-type="pmcid">PMC8269758</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref83">
        <label>83</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Georgalas</surname>
              <given-names>VL</given-names>
            </name>
            <name name-style="western">
              <surname>Kalantzi</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Harpur</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Kenny</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>The effects of caffeine on voice: a systematic review</article-title>
          <source>J Voice</source>
          <year>2023</year>
          <month>07</month>
          <volume>37</volume>
          <issue>4</issue>
          <fpage>636.e7</fpage>
          <lpage>19</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S0892-1997(21)00084-9"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jvoice.2021.02.025</pub-id>
          <pub-id pub-id-type="medline">33752928</pub-id>
          <pub-id pub-id-type="pii">S0892-1997(21)00084-9</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref84">
        <label>84</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Schuller</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Steidl</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Batliner</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Schiel</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Krajewski</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Weninger</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Eyben</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <article-title>Medium-term speaker states—a review on intoxication, sleepiness and the first challenge</article-title>
          <source>Comput Speech Lang</source>
          <year>2014</year>
          <month>03</month>
          <volume>28</volume>
          <issue>2</issue>
          <fpage>346</fpage>
          <lpage>74</lpage>
          <pub-id pub-id-type="doi">10.1016/j.csl.2012.12.002</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref85">
        <label>85</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Nemr</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Di Carlos Silva</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Rodrigues</surname>
              <given-names>DD</given-names>
            </name>
            <name name-style="western">
              <surname>Zenari</surname>
              <given-names>MS</given-names>
            </name>
          </person-group>
          <article-title>Medications and adverse voice effects</article-title>
          <source>J Voice</source>
          <year>2018</year>
          <month>07</month>
          <volume>32</volume>
          <issue>4</issue>
          <fpage>515.e29</fpage>
          <lpage>39</lpage>
          <pub-id pub-id-type="doi">10.1016/j.jvoice.2017.07.009</pub-id>
          <pub-id pub-id-type="medline">28822620</pub-id>
          <pub-id pub-id-type="pii">S0892-1997(17)30091-7</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref86">
        <label>86</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Fusaroli</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Simonsen</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Borrie</surname>
              <given-names>SA</given-names>
            </name>
            <name name-style="western">
              <surname>Low</surname>
              <given-names>DM</given-names>
            </name>
            <name name-style="western">
              <surname>Parola</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Raschi</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Poluzzi</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Fusaroli</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Identifying medications underlying communication atypicalities in psychotic and affective disorders: a pharmacovigilance study within the FDA adverse event reporting system</article-title>
          <source>J Speech Lang Hear Res</source>
          <year>2023</year>
          <month>09</month>
          <day>13</day>
          <volume>66</volume>
          <issue>9</issue>
          <fpage>3242</fpage>
          <lpage>59</lpage>
          <pub-id pub-id-type="doi">10.1044/2023_jslhr-22-00739</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref87">
        <label>87</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Eyben</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Weninger</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Gross</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Schuller</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>Recent developments in openSMILE, the munich open-source multimedia feature extractor</article-title>
          <source>Proceedings of the 21st ACM international conference on Multimedia</source>
          <year>2013</year>
          <conf-name>MM '13</conf-name>
          <conf-date>October 21-25, 2013</conf-date>
          <conf-loc>Barcelona, Spain</conf-loc>
          <fpage>835</fpage>
          <lpage>8</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dl.acm.org/doi/10.1145/2502081.2502224"/>
          </comment>
          <pub-id pub-id-type="doi">10.1145/2502081.2502224</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref88">
        <label>88</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Eyben</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Scherer</surname>
              <given-names>KR</given-names>
            </name>
            <name name-style="western">
              <surname>Schuller</surname>
              <given-names>BW</given-names>
            </name>
            <name name-style="western">
              <surname>Sundberg</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Andre</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Busso</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Devillers</surname>
              <given-names>LY</given-names>
            </name>
            <name name-style="western">
              <surname>Epps</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Laukka</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Narayanan</surname>
              <given-names>SS</given-names>
            </name>
            <name name-style="western">
              <surname>Truong</surname>
              <given-names>KP</given-names>
            </name>
          </person-group>
          <article-title>The Geneva minimalistic acoustic parameter set (GeMAPS) for voice research and affective computing</article-title>
          <source>IEEE Trans Affective Comput</source>
          <year>2016</year>
          <month>4</month>
          <day>1</day>
          <volume>7</volume>
          <issue>2</issue>
          <fpage>190</fpage>
          <lpage>202</lpage>
          <pub-id pub-id-type="doi">10.1109/taffc.2015.2457417</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref89">
        <label>89</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Larsen</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Murton</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Song</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Joachim</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Watts</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Kapczinski</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Venesky</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Hurowitz</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Validating the efficacy and value proposition of mental fitness vocal biomarkers in a psychiatric population: prospective cohort study</article-title>
          <source>Front Psychiatry</source>
          <year>2024</year>
          <month>3</month>
          <day>5</day>
          <volume>15</volume>
          <fpage>1342835</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/38505797"/>
          </comment>
          <pub-id pub-id-type="doi">10.3389/fpsyt.2024.1342835</pub-id>
          <pub-id pub-id-type="medline">38505797</pub-id>
          <pub-id pub-id-type="pmcid">PMC10948552</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref90">
        <label>90</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lenain</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Weston</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Shivkumar</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Fristed</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Surfboard: audio feature extraction for modern machine learning</article-title>
          <source>Proc Interspeech</source>
          <year>2020</year>
          <volume>33</volume>
          <fpage>2917</fpage>
          <lpage>21</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.isca-archive.org/interspeech_2020/lenain20_interspeech.html#"/>
          </comment>
          <pub-id pub-id-type="doi">10.21437/interspeech.2020-2879</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref91">
        <label>91</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Russell</surname>
              <given-names>SO</given-names>
            </name>
            <name name-style="western">
              <surname>Gessinger</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Krason</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Vigliocco</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Harte</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <article-title>What automatic speech recognition can and cannot do for conversational speech transcription</article-title>
          <source>Res Methods Appl Linguist</source>
          <year>2024</year>
          <month>12</month>
          <volume>3</volume>
          <issue>3</issue>
          <fpage>100163</fpage>
          <pub-id pub-id-type="doi">10.1016/j.rmal.2024.100163</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref92">
        <label>92</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Botelho</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Abad</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Schultz</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Trancoso</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Botelho</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Speech as a biomarker for disease detection</article-title>
          <source>IEEE Access</source>
          <year>2024</year>
          <volume>12</volume>
          <fpage>184487</fpage>
          <lpage>508</lpage>
          <pub-id pub-id-type="doi">10.1109/access.2024.3506433</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref93">
        <label>93</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Brown</surname>
              <given-names>VA</given-names>
            </name>
          </person-group>
          <article-title>An introduction to linear mixed-effects modeling in R</article-title>
          <source>Adv Methods Pract Psychol Sci</source>
          <year>2021</year>
          <month>03</month>
          <day>25</day>
          <volume>4</volume>
          <issue>1</issue>
          <fpage>35</fpage>
          <pub-id pub-id-type="doi">10.1177/2515245920960351</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref94">
        <label>94</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bagiella</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Sloan</surname>
              <given-names>RP</given-names>
            </name>
            <name name-style="western">
              <surname>Heitjan</surname>
              <given-names>DF</given-names>
            </name>
          </person-group>
          <article-title>Mixed‐effects models in psychophysiology</article-title>
          <source>Psychophysiol</source>
          <year>2003</year>
          <month>03</month>
          <day>19</day>
          <volume>37</volume>
          <issue>1</issue>
          <fpage>13</fpage>
          <lpage>20</lpage>
          <pub-id pub-id-type="doi">10.1111/1469-8986.3710013</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref95">
        <label>95</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Dineley</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Tamaris</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Pan</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>White</surname>
              <given-names>LL</given-names>
            </name>
            <name name-style="western">
              <surname>Rahman</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Lucas</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Carr</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Matcham</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Downs</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Dobson</surname>
              <given-names>R</given-names>
            </name>
            <collab>Cummins</collab>
          </person-group>
          <article-title>Towards robust protocols for longitudinal mHealth speech analysis in mental health: an investigation of practice effects</article-title>
          <source>Proceedings of the 2nd International Digital Mental Health &#38; Wellbeing Conference</source>
          <year>2024</year>
          <conf-name>2nd International Digital Mental Health &#38; Wellbeing Conference</conf-name>
          <conf-date>June 19-21, 2024</conf-date>
          <conf-loc>Derry-Londonderry, Ireland</conf-loc>
          <publisher-name>Ulster University</publisher-name>
          <fpage>2373</fpage>
          <lpage>7</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://pure.ulster.ac.uk/en/publications/dmhw-conference-proceedings-2024"/>
          </comment>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
