@inproceedings{perez-enriquez-etal-2024-automatic-punctuation,
title = "Automatic Punctuation Model for {S}panish Live Transcriptions",
author = "Perez-Enriquez, Mario and
Masiello-Ruiz, Jose Manuel and
Lopez-Cuadrado, Jose Luis and
Gonzalez-Carrasco, Israel and
Martinez-Fernandez, Paloma and
Ruiz-Mezcua, Belen",
editor = "Calzolari, Nicoletta and
Kan, Min-Yen and
Hoste, Veronique and
Lenci, Alessandro and
Sakti, Sakriani and
Xue, Nianwen",
booktitle = "Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)",
month = may,
year = "2024",
address = "Torino, Italia",
publisher = "ELRA and ICCL",
url = "https://aclanthology.org/2024.lrec-main.175",
pages = "1953--1958",
abstract = "With the widespread adoption of automatic transcription tools, acquiring speech transcriptions within seconds has become a reality. Nonetheless, many of these tools yield unpunctuated outputs, potentially incurring additional costs. This paper presents a novel approach to integrating punctuation into the transcriptions generated by such automatic tools, specifically focusing on Spanish-speaking contexts. Leveraging the RoBERTa-bne model pre-trained with data from the Spanish National Library, our training proposal is augmented with additional corpora to enhance performance on less common punctuation marks, such as question marks. Also, the proposed model has been trained through fine-tuning pre-trained models, involving adjustments for token classification and using SoftMax to identify the highest probability token. The proposed model obtains promising results when compared with other Spanish reference paper models. Ultimately, this model aims to facilitate punctuation on live transcriptions seamlessly and accurately. The proposed model will be applied to a real-case education project to improve the readability of the transcriptions.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="perez-enriquez-etal-2024-automatic-punctuation">
<titleInfo>
<title>Automatic Punctuation Model for Spanish Live Transcriptions</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mario</namePart>
<namePart type="family">Perez-Enriquez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jose</namePart>
<namePart type="given">Manuel</namePart>
<namePart type="family">Masiello-Ruiz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jose</namePart>
<namePart type="given">Luis</namePart>
<namePart type="family">Lopez-Cuadrado</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Israel</namePart>
<namePart type="family">Gonzalez-Carrasco</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Paloma</namePart>
<namePart type="family">Martinez-Fernandez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Belen</namePart>
<namePart type="family">Ruiz-Mezcua</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Min-Yen</namePart>
<namePart type="family">Kan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Veronique</namePart>
<namePart type="family">Hoste</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alessandro</namePart>
<namePart type="family">Lenci</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sakriani</namePart>
<namePart type="family">Sakti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nianwen</namePart>
<namePart type="family">Xue</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>ELRA and ICCL</publisher>
<place>
<placeTerm type="text">Torino, Italia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>With the widespread adoption of automatic transcription tools, acquiring speech transcriptions within seconds has become a reality. Nonetheless, many of these tools yield unpunctuated outputs, potentially incurring additional costs. This paper presents a novel approach to integrating punctuation into the transcriptions generated by such automatic tools, specifically focusing on Spanish-speaking contexts. Leveraging the RoBERTa-bne model pre-trained with data from the Spanish National Library, our training proposal is augmented with additional corpora to enhance performance on less common punctuation marks, such as question marks. Also, the proposed model has been trained through fine-tuning pre-trained models, involving adjustments for token classification and using SoftMax to identify the highest probability token. The proposed model obtains promising results when compared with other Spanish reference paper models. Ultimately, this model aims to facilitate punctuation on live transcriptions seamlessly and accurately. The proposed model will be applied to a real-case education project to improve the readability of the transcriptions.</abstract>
<identifier type="citekey">perez-enriquez-etal-2024-automatic-punctuation</identifier>
<location>
<url>https://aclanthology.org/2024.lrec-main.175</url>
</location>
<part>
<date>2024-05</date>
<extent unit="page">
<start>1953</start>
<end>1958</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Automatic Punctuation Model for Spanish Live Transcriptions
%A Perez-Enriquez, Mario
%A Masiello-Ruiz, Jose Manuel
%A Lopez-Cuadrado, Jose Luis
%A Gonzalez-Carrasco, Israel
%A Martinez-Fernandez, Paloma
%A Ruiz-Mezcua, Belen
%Y Calzolari, Nicoletta
%Y Kan, Min-Yen
%Y Hoste, Veronique
%Y Lenci, Alessandro
%Y Sakti, Sakriani
%Y Xue, Nianwen
%S Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)
%D 2024
%8 May
%I ELRA and ICCL
%C Torino, Italia
%F perez-enriquez-etal-2024-automatic-punctuation
%X With the widespread adoption of automatic transcription tools, acquiring speech transcriptions within seconds has become a reality. Nonetheless, many of these tools yield unpunctuated outputs, potentially incurring additional costs. This paper presents a novel approach to integrating punctuation into the transcriptions generated by such automatic tools, specifically focusing on Spanish-speaking contexts. Leveraging the RoBERTa-bne model pre-trained with data from the Spanish National Library, our training proposal is augmented with additional corpora to enhance performance on less common punctuation marks, such as question marks. Also, the proposed model has been trained through fine-tuning pre-trained models, involving adjustments for token classification and using SoftMax to identify the highest probability token. The proposed model obtains promising results when compared with other Spanish reference paper models. Ultimately, this model aims to facilitate punctuation on live transcriptions seamlessly and accurately. The proposed model will be applied to a real-case education project to improve the readability of the transcriptions.
%U https://aclanthology.org/2024.lrec-main.175
%P 1953-1958
Markdown (Informal)
[Automatic Punctuation Model for Spanish Live Transcriptions](https://aclanthology.org/2024.lrec-main.175) (Perez-Enriquez et al., LREC-COLING 2024)
ACL
- Mario Perez-Enriquez, Jose Manuel Masiello-Ruiz, Jose Luis Lopez-Cuadrado, Israel Gonzalez-Carrasco, Paloma Martinez-Fernandez, and Belen Ruiz-Mezcua. 2024. Automatic Punctuation Model for Spanish Live Transcriptions. In Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024), pages 1953–1958, Torino, Italia. ELRA and ICCL.