@inproceedings{espana-bonet-barron-cedeno-2024-elote,
title = "Elote, Choclo and Mazorca: on the Varieties of {S}panish",
author = "Espa{\~n}a-Bonet, Cristina and
Barr{\'o}n-Cede{\~n}o, Alberto",
editor = "Duh, Kevin and
Gomez, Helena and
Bethard, Steven",
booktitle = "Proceedings of the 2024 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 1: Long Papers)",
month = jun,
year = "2024",
address = "Mexico City, Mexico",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.naacl-long.204",
pages = "3689--3711",
abstract = "Spanish is one of the most widespread languages: the official language in 20 countries and the second most-spoken native language. Its contact with other languages across different regions and the rich regional and cultural diversity has produced varieties which divert from each other, particularly in terms of lexicon. Still, available corpora, and models trained upon them, generally treat Spanish as one monolithic language, which dampers prediction and generation power when dealing with different varieties. To alleviate the situation, we compile and curate datasets in the different varieties of Spanish around the world at an unprecedented scale and create the CEREAL corpus. With such a resource at hand, we perform a stylistic analysis to identify and characterise varietal differences. We implement a classifier specially designed to deal with long documents and identify Spanish varieties (and therefore expand CEREAL further). We produce varietal-specific embeddings, and analyse the cultural differences that they encode. We make data, code and models publicly available.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="espana-bonet-barron-cedeno-2024-elote">
<titleInfo>
<title>Elote, Choclo and Mazorca: on the Varieties of Spanish</title>
</titleInfo>
<name type="personal">
<namePart type="given">Cristina</namePart>
<namePart type="family">España-Bonet</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alberto</namePart>
<namePart type="family">Barrón-Cedeño</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2024 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 1: Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kevin</namePart>
<namePart type="family">Duh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Helena</namePart>
<namePart type="family">Gomez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Steven</namePart>
<namePart type="family">Bethard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Mexico City, Mexico</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Spanish is one of the most widespread languages: the official language in 20 countries and the second most-spoken native language. Its contact with other languages across different regions and the rich regional and cultural diversity has produced varieties which divert from each other, particularly in terms of lexicon. Still, available corpora, and models trained upon them, generally treat Spanish as one monolithic language, which dampers prediction and generation power when dealing with different varieties. To alleviate the situation, we compile and curate datasets in the different varieties of Spanish around the world at an unprecedented scale and create the CEREAL corpus. With such a resource at hand, we perform a stylistic analysis to identify and characterise varietal differences. We implement a classifier specially designed to deal with long documents and identify Spanish varieties (and therefore expand CEREAL further). We produce varietal-specific embeddings, and analyse the cultural differences that they encode. We make data, code and models publicly available.</abstract>
<identifier type="citekey">espana-bonet-barron-cedeno-2024-elote</identifier>
<location>
<url>https://aclanthology.org/2024.naacl-long.204</url>
</location>
<part>
<date>2024-06</date>
<extent unit="page">
<start>3689</start>
<end>3711</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Elote, Choclo and Mazorca: on the Varieties of Spanish
%A España-Bonet, Cristina
%A Barrón-Cedeño, Alberto
%Y Duh, Kevin
%Y Gomez, Helena
%Y Bethard, Steven
%S Proceedings of the 2024 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 1: Long Papers)
%D 2024
%8 June
%I Association for Computational Linguistics
%C Mexico City, Mexico
%F espana-bonet-barron-cedeno-2024-elote
%X Spanish is one of the most widespread languages: the official language in 20 countries and the second most-spoken native language. Its contact with other languages across different regions and the rich regional and cultural diversity has produced varieties which divert from each other, particularly in terms of lexicon. Still, available corpora, and models trained upon them, generally treat Spanish as one monolithic language, which dampers prediction and generation power when dealing with different varieties. To alleviate the situation, we compile and curate datasets in the different varieties of Spanish around the world at an unprecedented scale and create the CEREAL corpus. With such a resource at hand, we perform a stylistic analysis to identify and characterise varietal differences. We implement a classifier specially designed to deal with long documents and identify Spanish varieties (and therefore expand CEREAL further). We produce varietal-specific embeddings, and analyse the cultural differences that they encode. We make data, code and models publicly available.
%U https://aclanthology.org/2024.naacl-long.204
%P 3689-3711
Markdown (Informal)
[Elote, Choclo and Mazorca: on the Varieties of Spanish](https://aclanthology.org/2024.naacl-long.204) (España-Bonet & Barrón-Cedeño, NAACL 2024)
ACL
- Cristina España-Bonet and Alberto Barrón-Cedeño. 2024. Elote, Choclo and Mazorca: on the Varieties of Spanish. In Proceedings of the 2024 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 1: Long Papers), pages 3689–3711, Mexico City, Mexico. Association for Computational Linguistics.