@inproceedings{bel-etal-2024-escola-spanish,
title = "{E}s{C}o{LA}: {S}panish Corpus of Linguistic Acceptability",
author = "Bel, Nuria and
Punsola, Marta and
Ru{\'\i}z-Fern{\'a}ndez, Valle",
editor = "Calzolari, Nicoletta and
Kan, Min-Yen and
Hoste, Veronique and
Lenci, Alessandro and
Sakti, Sakriani and
Xue, Nianwen",
booktitle = "Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)",
month = may,
year = "2024",
address = "Torino, Italia",
publisher = "ELRA and ICCL",
url = "https://aclanthology.org/2024.lrec-main.554",
pages = "6268--6277",
abstract = "Acceptability is one of the General Language Understanding Evaluation Benchmark (GLUE) probing tasks proposed to assess the linguistic capabilities acquired by a deep-learning transformer-based language model (LM). In this paper, we introduce the Spanish Corpus of Linguistic Acceptability EsCoLA. EsCoLA has been developed following the example of other linguistic acceptability data sets for English, Italian, Norwegian or Russian, with the aim of having a complete GLUE benchmark for Spanish. EsCoLA consists of 11,174 sentences and their acceptability judgements as found in well-known Spanish reference grammars. Additionally, all sentences have been annotated with the class of linguistic phenomenon the sentence is an example of, also following previous practices. We also provide as task baselines the results of fine-tuning four different language models with this data set and the results of a human annotation experiment. Results are also analyzed and commented to guide future research. EsCoLA is released under a CC-BY 4.0 license and freely available at https://doi.org/10.34810/data1138.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="bel-etal-2024-escola-spanish">
<titleInfo>
<title>EsCoLA: Spanish Corpus of Linguistic Acceptability</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nuria</namePart>
<namePart type="family">Bel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marta</namePart>
<namePart type="family">Punsola</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Valle</namePart>
<namePart type="family">Ruíz-Fernández</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Min-Yen</namePart>
<namePart type="family">Kan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Veronique</namePart>
<namePart type="family">Hoste</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alessandro</namePart>
<namePart type="family">Lenci</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sakriani</namePart>
<namePart type="family">Sakti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nianwen</namePart>
<namePart type="family">Xue</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>ELRA and ICCL</publisher>
<place>
<placeTerm type="text">Torino, Italia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Acceptability is one of the General Language Understanding Evaluation Benchmark (GLUE) probing tasks proposed to assess the linguistic capabilities acquired by a deep-learning transformer-based language model (LM). In this paper, we introduce the Spanish Corpus of Linguistic Acceptability EsCoLA. EsCoLA has been developed following the example of other linguistic acceptability data sets for English, Italian, Norwegian or Russian, with the aim of having a complete GLUE benchmark for Spanish. EsCoLA consists of 11,174 sentences and their acceptability judgements as found in well-known Spanish reference grammars. Additionally, all sentences have been annotated with the class of linguistic phenomenon the sentence is an example of, also following previous practices. We also provide as task baselines the results of fine-tuning four different language models with this data set and the results of a human annotation experiment. Results are also analyzed and commented to guide future research. EsCoLA is released under a CC-BY 4.0 license and freely available at https://doi.org/10.34810/data1138.</abstract>
<identifier type="citekey">bel-etal-2024-escola-spanish</identifier>
<location>
<url>https://aclanthology.org/2024.lrec-main.554</url>
</location>
<part>
<date>2024-05</date>
<extent unit="page">
<start>6268</start>
<end>6277</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T EsCoLA: Spanish Corpus of Linguistic Acceptability
%A Bel, Nuria
%A Punsola, Marta
%A Ruíz-Fernández, Valle
%Y Calzolari, Nicoletta
%Y Kan, Min-Yen
%Y Hoste, Veronique
%Y Lenci, Alessandro
%Y Sakti, Sakriani
%Y Xue, Nianwen
%S Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)
%D 2024
%8 May
%I ELRA and ICCL
%C Torino, Italia
%F bel-etal-2024-escola-spanish
%X Acceptability is one of the General Language Understanding Evaluation Benchmark (GLUE) probing tasks proposed to assess the linguistic capabilities acquired by a deep-learning transformer-based language model (LM). In this paper, we introduce the Spanish Corpus of Linguistic Acceptability EsCoLA. EsCoLA has been developed following the example of other linguistic acceptability data sets for English, Italian, Norwegian or Russian, with the aim of having a complete GLUE benchmark for Spanish. EsCoLA consists of 11,174 sentences and their acceptability judgements as found in well-known Spanish reference grammars. Additionally, all sentences have been annotated with the class of linguistic phenomenon the sentence is an example of, also following previous practices. We also provide as task baselines the results of fine-tuning four different language models with this data set and the results of a human annotation experiment. Results are also analyzed and commented to guide future research. EsCoLA is released under a CC-BY 4.0 license and freely available at https://doi.org/10.34810/data1138.
%U https://aclanthology.org/2024.lrec-main.554
%P 6268-6277
Markdown (Informal)
[EsCoLA: Spanish Corpus of Linguistic Acceptability](https://aclanthology.org/2024.lrec-main.554) (Bel et al., LREC-COLING 2024)
ACL
- Nuria Bel, Marta Punsola, and Valle Ruíz-Fernández. 2024. EsCoLA: Spanish Corpus of Linguistic Acceptability. In Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024), pages 6268–6277, Torino, Italia. ELRA and ICCL.