@inproceedings{fedotova-etal-2024-constructing,
title = "Constructing a Multimodal, Multilingual Translation and Interpreting Corpus: A Modular Pipeline and an Evaluation of {ASR} for Verbatim Transcription",
author = "Fedotova, Alice and
Ferraresi, Adriano and
Mili{\v{c}}evi{\'c} Petrovi{\'c}, Maja and
Barr{\'o}n-Cede{\~n}o, Alberto",
editor = "Dell'Orletta, Felice and
Lenci, Alessandro and
Montemagni, Simonetta and
Sprugnoli, Rachele",
booktitle = "Proceedings of the 10th Italian Conference on Computational Linguistics (CLiC-it 2024)",
month = dec,
year = "2024",
address = "Pisa, Italy",
publisher = "CEUR Workshop Proceedings",
url = "https://aclanthology.org/2024.clicit-1.42/",
pages = "349--355",
ISBN = "979-12-210-7060-6",
abstract = "This paper presents a novel pipeline for constructing multimodal and multilingual parallel corpora, with a focus on evaluating state-of-the-art ASR tools for verbatim transcription. Our findings indicate that current technologies can streamline corpus construction, with fine-tuning showing promising results in terms of transcription quality compared to out-of-the-box Whisper models. The lowest overall WER achieved for English was 0.180, using a fine-tuned Whisper-small model. As for Italian, the fine-tuned Whisper-small model obtained a lower WER of 0.201 compared to the baseline Whisper-small`s WER of 0.219. While limitations remain, the updated pipeline is expected to drastically reduce the human efforts involved."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="fedotova-etal-2024-constructing">
<titleInfo>
<title>Constructing a Multimodal, Multilingual Translation and Interpreting Corpus: A Modular Pipeline and an Evaluation of ASR for Verbatim Transcription</title>
</titleInfo>
<name type="personal">
<namePart type="given">Alice</namePart>
<namePart type="family">Fedotova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Adriano</namePart>
<namePart type="family">Ferraresi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maja</namePart>
<namePart type="family">Miličević Petrović</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alberto</namePart>
<namePart type="family">Barrón-Cedeño</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 10th Italian Conference on Computational Linguistics (CLiC-it 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Felice</namePart>
<namePart type="family">Dell’Orletta</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alessandro</namePart>
<namePart type="family">Lenci</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Simonetta</namePart>
<namePart type="family">Montemagni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rachele</namePart>
<namePart type="family">Sprugnoli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>CEUR Workshop Proceedings</publisher>
<place>
<placeTerm type="text">Pisa, Italy</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-12-210-7060-6</identifier>
</relatedItem>
<abstract>This paper presents a novel pipeline for constructing multimodal and multilingual parallel corpora, with a focus on evaluating state-of-the-art ASR tools for verbatim transcription. Our findings indicate that current technologies can streamline corpus construction, with fine-tuning showing promising results in terms of transcription quality compared to out-of-the-box Whisper models. The lowest overall WER achieved for English was 0.180, using a fine-tuned Whisper-small model. As for Italian, the fine-tuned Whisper-small model obtained a lower WER of 0.201 compared to the baseline Whisper-small‘s WER of 0.219. While limitations remain, the updated pipeline is expected to drastically reduce the human efforts involved.</abstract>
<identifier type="citekey">fedotova-etal-2024-constructing</identifier>
<location>
<url>https://aclanthology.org/2024.clicit-1.42/</url>
</location>
<part>
<date>2024-12</date>
<extent unit="page">
<start>349</start>
<end>355</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Constructing a Multimodal, Multilingual Translation and Interpreting Corpus: A Modular Pipeline and an Evaluation of ASR for Verbatim Transcription
%A Fedotova, Alice
%A Ferraresi, Adriano
%A Miličević Petrović, Maja
%A Barrón-Cedeño, Alberto
%Y Dell’Orletta, Felice
%Y Lenci, Alessandro
%Y Montemagni, Simonetta
%Y Sprugnoli, Rachele
%S Proceedings of the 10th Italian Conference on Computational Linguistics (CLiC-it 2024)
%D 2024
%8 December
%I CEUR Workshop Proceedings
%C Pisa, Italy
%@ 979-12-210-7060-6
%F fedotova-etal-2024-constructing
%X This paper presents a novel pipeline for constructing multimodal and multilingual parallel corpora, with a focus on evaluating state-of-the-art ASR tools for verbatim transcription. Our findings indicate that current technologies can streamline corpus construction, with fine-tuning showing promising results in terms of transcription quality compared to out-of-the-box Whisper models. The lowest overall WER achieved for English was 0.180, using a fine-tuned Whisper-small model. As for Italian, the fine-tuned Whisper-small model obtained a lower WER of 0.201 compared to the baseline Whisper-small‘s WER of 0.219. While limitations remain, the updated pipeline is expected to drastically reduce the human efforts involved.
%U https://aclanthology.org/2024.clicit-1.42/
%P 349-355
Markdown (Informal)
[Constructing a Multimodal, Multilingual Translation and Interpreting Corpus: A Modular Pipeline and an Evaluation of ASR for Verbatim Transcription](https://aclanthology.org/2024.clicit-1.42/) (Fedotova et al., CLiC-it 2024)
ACL