@inproceedings{zhang-etal-2024-voice,
title = "Voice Activity Detection on {I}talian Language",
author = "Zhang, Shibingfeng and
Gagliardi, Gloria and
Tamburini, Fabio",
editor = "Dell'Orletta, Felice and
Lenci, Alessandro and
Montemagni, Simonetta and
Sprugnoli, Rachele",
booktitle = "Proceedings of the 10th Italian Conference on Computational Linguistics (CLiC-it 2024)",
month = dec,
year = "2024",
address = "Pisa, Italy",
publisher = "CEUR Workshop Proceedings",
url = "https://aclanthology.org/2024.clicit-1.111/",
pages = "1024--1029",
ISBN = "979-12-210-7060-6",
abstract = "Voice Activity Detection (VAD) refers to the task of identifying human voice activity in noisy settings, playing a crucial role in fields like speech recognition and audio surveillance. However, most VAD research focuses on English, leaving other languages, such as Italian, under-explored. This study aims to evaluate and enhance VAD systems for Italian speech, with the goal of finding a solution for the speech segmentation component of the Digital Linguistic Biomarkers (DLBs) extraction pipeline for early mental disorder diagnosis. We experimented with various VAD systems and propose an ensemble VAD system that integrates the best-performing models. Our ensemble system shows significant improvements in speech event detection. This advancement lays a robust foundation for more accurate early detection of mental health issues using DLBs in Italian."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="zhang-etal-2024-voice">
<titleInfo>
<title>Voice Activity Detection on Italian Language</title>
</titleInfo>
<name type="personal">
<namePart type="given">Shibingfeng</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gloria</namePart>
<namePart type="family">Gagliardi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fabio</namePart>
<namePart type="family">Tamburini</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 10th Italian Conference on Computational Linguistics (CLiC-it 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Felice</namePart>
<namePart type="family">Dell’Orletta</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alessandro</namePart>
<namePart type="family">Lenci</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Simonetta</namePart>
<namePart type="family">Montemagni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rachele</namePart>
<namePart type="family">Sprugnoli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>CEUR Workshop Proceedings</publisher>
<place>
<placeTerm type="text">Pisa, Italy</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-12-210-7060-6</identifier>
</relatedItem>
<abstract>Voice Activity Detection (VAD) refers to the task of identifying human voice activity in noisy settings, playing a crucial role in fields like speech recognition and audio surveillance. However, most VAD research focuses on English, leaving other languages, such as Italian, under-explored. This study aims to evaluate and enhance VAD systems for Italian speech, with the goal of finding a solution for the speech segmentation component of the Digital Linguistic Biomarkers (DLBs) extraction pipeline for early mental disorder diagnosis. We experimented with various VAD systems and propose an ensemble VAD system that integrates the best-performing models. Our ensemble system shows significant improvements in speech event detection. This advancement lays a robust foundation for more accurate early detection of mental health issues using DLBs in Italian.</abstract>
<identifier type="citekey">zhang-etal-2024-voice</identifier>
<location>
<url>https://aclanthology.org/2024.clicit-1.111/</url>
</location>
<part>
<date>2024-12</date>
<extent unit="page">
<start>1024</start>
<end>1029</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Voice Activity Detection on Italian Language
%A Zhang, Shibingfeng
%A Gagliardi, Gloria
%A Tamburini, Fabio
%Y Dell’Orletta, Felice
%Y Lenci, Alessandro
%Y Montemagni, Simonetta
%Y Sprugnoli, Rachele
%S Proceedings of the 10th Italian Conference on Computational Linguistics (CLiC-it 2024)
%D 2024
%8 December
%I CEUR Workshop Proceedings
%C Pisa, Italy
%@ 979-12-210-7060-6
%F zhang-etal-2024-voice
%X Voice Activity Detection (VAD) refers to the task of identifying human voice activity in noisy settings, playing a crucial role in fields like speech recognition and audio surveillance. However, most VAD research focuses on English, leaving other languages, such as Italian, under-explored. This study aims to evaluate and enhance VAD systems for Italian speech, with the goal of finding a solution for the speech segmentation component of the Digital Linguistic Biomarkers (DLBs) extraction pipeline for early mental disorder diagnosis. We experimented with various VAD systems and propose an ensemble VAD system that integrates the best-performing models. Our ensemble system shows significant improvements in speech event detection. This advancement lays a robust foundation for more accurate early detection of mental health issues using DLBs in Italian.
%U https://aclanthology.org/2024.clicit-1.111/
%P 1024-1029
Markdown (Informal)
[Voice Activity Detection on Italian Language](https://aclanthology.org/2024.clicit-1.111/) (Zhang et al., CLiC-it 2024)
ACL
- Shibingfeng Zhang, Gloria Gagliardi, and Fabio Tamburini. 2024. Voice Activity Detection on Italian Language. In Proceedings of the 10th Italian Conference on Computational Linguistics (CLiC-it 2024), pages 1024–1029, Pisa, Italy. CEUR Workshop Proceedings.