@inproceedings{esteve-dobrovoljc-2026-delta,
title = "{DELTA}: A Toolkit for Measuring Linguistic Diversity in Dependency-Parsed Corpora",
author = "Est{\`e}ve, Louis and
Dobrovoljc, Kaja",
editor = "Croce, Danilo and
Leidner, Jochen and
Moosavi, Nafise Sadat",
booktitle = "Proceedings of the 19th Conference of the {E}uropean Chapter of the {A}ssociation for {C}omputational {L}inguistics (Volume 3: System Demonstrations)",
month = mar,
year = "2026",
address = "Rabat, Marocco",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.eacl-demo.6/",
pages = "75--85",
ISBN = "979-8-89176-382-1",
abstract = "Despite growing interest in measuring linguistic diversity on the one hand and the increasing availability of cross-linguistically comparable parsed corpora on the other, tools for systematically measuring the diversity of specific linguistic phenomena on such data remain limited. To address this gap, we present DELTA, an open-source framework that integrates dependency tree querying with diversity computation, enabling systematic measurement across multiple linguistic levels (e.g., lexis, morphology, syntax) and multiple diversity dimensions (variety, balance, disparity). The pipeline processes CoNLL-U formatted corpora through configurable workflows, treating the format as a general-purpose tabular structure independent of specific annotation conventions. We validate DELTA on Parallel Universal Dependencies multilingual dataset, demonstrating its capacity for corpus profiling and cross-corpus diversity comparison."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="esteve-dobrovoljc-2026-delta">
<titleInfo>
<title>DELTA: A Toolkit for Measuring Linguistic Diversity in Dependency-Parsed Corpora</title>
</titleInfo>
<name type="personal">
<namePart type="given">Louis</namePart>
<namePart type="family">Estève</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kaja</namePart>
<namePart type="family">Dobrovoljc</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 19th Conference of the European Chapter of the Association for Computational Linguistics (Volume 3: System Demonstrations)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Danilo</namePart>
<namePart type="family">Croce</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jochen</namePart>
<namePart type="family">Leidner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nafise</namePart>
<namePart type="given">Sadat</namePart>
<namePart type="family">Moosavi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Rabat, Marocco</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-382-1</identifier>
</relatedItem>
<abstract>Despite growing interest in measuring linguistic diversity on the one hand and the increasing availability of cross-linguistically comparable parsed corpora on the other, tools for systematically measuring the diversity of specific linguistic phenomena on such data remain limited. To address this gap, we present DELTA, an open-source framework that integrates dependency tree querying with diversity computation, enabling systematic measurement across multiple linguistic levels (e.g., lexis, morphology, syntax) and multiple diversity dimensions (variety, balance, disparity). The pipeline processes CoNLL-U formatted corpora through configurable workflows, treating the format as a general-purpose tabular structure independent of specific annotation conventions. We validate DELTA on Parallel Universal Dependencies multilingual dataset, demonstrating its capacity for corpus profiling and cross-corpus diversity comparison.</abstract>
<identifier type="citekey">esteve-dobrovoljc-2026-delta</identifier>
<location>
<url>https://aclanthology.org/2026.eacl-demo.6/</url>
</location>
<part>
<date>2026-03</date>
<extent unit="page">
<start>75</start>
<end>85</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T DELTA: A Toolkit for Measuring Linguistic Diversity in Dependency-Parsed Corpora
%A Estève, Louis
%A Dobrovoljc, Kaja
%Y Croce, Danilo
%Y Leidner, Jochen
%Y Moosavi, Nafise Sadat
%S Proceedings of the 19th Conference of the European Chapter of the Association for Computational Linguistics (Volume 3: System Demonstrations)
%D 2026
%8 March
%I Association for Computational Linguistics
%C Rabat, Marocco
%@ 979-8-89176-382-1
%F esteve-dobrovoljc-2026-delta
%X Despite growing interest in measuring linguistic diversity on the one hand and the increasing availability of cross-linguistically comparable parsed corpora on the other, tools for systematically measuring the diversity of specific linguistic phenomena on such data remain limited. To address this gap, we present DELTA, an open-source framework that integrates dependency tree querying with diversity computation, enabling systematic measurement across multiple linguistic levels (e.g., lexis, morphology, syntax) and multiple diversity dimensions (variety, balance, disparity). The pipeline processes CoNLL-U formatted corpora through configurable workflows, treating the format as a general-purpose tabular structure independent of specific annotation conventions. We validate DELTA on Parallel Universal Dependencies multilingual dataset, demonstrating its capacity for corpus profiling and cross-corpus diversity comparison.
%U https://aclanthology.org/2026.eacl-demo.6/
%P 75-85
Markdown (Informal)
[DELTA: A Toolkit for Measuring Linguistic Diversity in Dependency-Parsed Corpora](https://aclanthology.org/2026.eacl-demo.6/) (Estève & Dobrovoljc, EACL 2026)
ACL