@inproceedings{pagliai-etal-2026-spanish,
title = "The {S}panish Learner and Heritage Speaker Dependency Treebank",
author = "Pagliai, Valeria and
Rod{\'o}, Sergio Jos{\'e} Salazar and
Pulido, Emiliana and
Gutierrez-Quintero, Andres and
Liu, Zoey",
editor = "Voigt, Rob and
Warstadt, Alex and
Feldman, Naomi and
Linzen, Tal",
booktitle = "Proceedings of the Society for Computation in Linguistics 2026",
month = jul,
year = "2026",
address = "San Diego, CA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.scil-main.12/",
pages = "127--128",
ISBN = "979-8-89176-412-5",
abstract = "We present a manually curated L2-Heritage Speaker Spanish dataset (N = 49,247) following the Universal Dependencies framework, including lemmatizations, part-of-speech tags, syntactic dependencies, and instances of pro-drop and ungrammatical structures. In addition to this, for dependency parsing we examined different data partitioning strategies and data representations, as well as different training configurations using our data and the AnCora treebank. Overall, the results yield reasonable LAS scores and comparable performance between AnCora and our dataset."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="pagliai-etal-2026-spanish">
<titleInfo>
<title>The Spanish Learner and Heritage Speaker Dependency Treebank</title>
</titleInfo>
<name type="personal">
<namePart type="given">Valeria</namePart>
<namePart type="family">Pagliai</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sergio</namePart>
<namePart type="given">José</namePart>
<namePart type="given">Salazar</namePart>
<namePart type="family">Rodó</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Emiliana</namePart>
<namePart type="family">Pulido</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andres</namePart>
<namePart type="family">Gutierrez-Quintero</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zoey</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Society for Computation in Linguistics 2026</title>
</titleInfo>
<name type="personal">
<namePart type="given">Rob</namePart>
<namePart type="family">Voigt</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alex</namePart>
<namePart type="family">Warstadt</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Naomi</namePart>
<namePart type="family">Feldman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tal</namePart>
<namePart type="family">Linzen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, CA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-412-5</identifier>
</relatedItem>
<abstract>We present a manually curated L2-Heritage Speaker Spanish dataset (N = 49,247) following the Universal Dependencies framework, including lemmatizations, part-of-speech tags, syntactic dependencies, and instances of pro-drop and ungrammatical structures. In addition to this, for dependency parsing we examined different data partitioning strategies and data representations, as well as different training configurations using our data and the AnCora treebank. Overall, the results yield reasonable LAS scores and comparable performance between AnCora and our dataset.</abstract>
<identifier type="citekey">pagliai-etal-2026-spanish</identifier>
<location>
<url>https://aclanthology.org/2026.scil-main.12/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>127</start>
<end>128</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T The Spanish Learner and Heritage Speaker Dependency Treebank
%A Pagliai, Valeria
%A Rodó, Sergio José Salazar
%A Pulido, Emiliana
%A Gutierrez-Quintero, Andres
%A Liu, Zoey
%Y Voigt, Rob
%Y Warstadt, Alex
%Y Feldman, Naomi
%Y Linzen, Tal
%S Proceedings of the Society for Computation in Linguistics 2026
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, CA
%@ 979-8-89176-412-5
%F pagliai-etal-2026-spanish
%X We present a manually curated L2-Heritage Speaker Spanish dataset (N = 49,247) following the Universal Dependencies framework, including lemmatizations, part-of-speech tags, syntactic dependencies, and instances of pro-drop and ungrammatical structures. In addition to this, for dependency parsing we examined different data partitioning strategies and data representations, as well as different training configurations using our data and the AnCora treebank. Overall, the results yield reasonable LAS scores and comparable performance between AnCora and our dataset.
%U https://aclanthology.org/2026.scil-main.12/
%P 127-128
Markdown (Informal)
[The Spanish Learner and Heritage Speaker Dependency Treebank](https://aclanthology.org/2026.scil-main.12/) (Pagliai et al., SCiL 2026)
ACL
- Valeria Pagliai, Sergio José Salazar Rodó, Emiliana Pulido, Andres Gutierrez-Quintero, and Zoey Liu. 2026. The Spanish Learner and Heritage Speaker Dependency Treebank. In Proceedings of the Society for Computation in Linguistics 2026, pages 127–128, San Diego, CA. Association for Computational Linguistics.