@inproceedings{luecking-etal-2024-dependencies,
title = "Dependencies over Times and Tools ({D}o{TT})",
author = "Luecking, Andy and
Abrami, Giuseppe and
Hammerla, Leon and
Rahn, Marc and
Baumartz, Daniel and
Eger, Steffen and
Mehler, Alexander",
editor = "Calzolari, Nicoletta and
Kan, Min-Yen and
Hoste, Veronique and
Lenci, Alessandro and
Sakti, Sakriani and
Xue, Nianwen",
booktitle = "Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)",
month = may,
year = "2024",
address = "Torino, Italia",
publisher = "ELRA and ICCL",
url = "https://aclanthology.org/2024.lrec-main.415",
pages = "4641--4653",
abstract = "Purpose: Based on the examples of English and German, we investigate to what extent parsers trained on modern variants of these languages can be transferred to older language levels without loss. Methods: We developed a treebank called DoTT (https://github.com/texttechnologylab/DoTT) which covers, roughly, the time period from 1800 until today, in conjunction with the further development of the annotation tool DependencyAnnotator. DoTT consists of a collection of diachronic corpora enriched with dependency annotations using 3 parsers, 6 pre-trained language models, 5 newly trained models for German, and two tag sets (TIGER and Universal Dependencies). To assess how the different parsers perform on texts from different time periods, we created a gold standard sample as a benchmark. Results: We found that the parsers/models perform quite well on modern texts (document-level LAS ranging from 82.89 to 88.54) and slightly worse on older texts, as expected (average document-level LAS 84.60 vs. 86.14), but not significantly. For German texts, the (German) TIGER scheme achieved slightly better results than UD. Conclusion: Overall, this result speaks for the transferability of parsers to past language levels, at least dating back until around 1800. This very transferability, it is however argued, means that studies of language change in the field of dependency syntax can draw on dependency distance but miss out on some grammatical phenomena.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="luecking-etal-2024-dependencies">
<titleInfo>
<title>Dependencies over Times and Tools (DoTT)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Andy</namePart>
<namePart type="family">Luecking</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Giuseppe</namePart>
<namePart type="family">Abrami</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Leon</namePart>
<namePart type="family">Hammerla</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marc</namePart>
<namePart type="family">Rahn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Baumartz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Steffen</namePart>
<namePart type="family">Eger</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alexander</namePart>
<namePart type="family">Mehler</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Min-Yen</namePart>
<namePart type="family">Kan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Veronique</namePart>
<namePart type="family">Hoste</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alessandro</namePart>
<namePart type="family">Lenci</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sakriani</namePart>
<namePart type="family">Sakti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nianwen</namePart>
<namePart type="family">Xue</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>ELRA and ICCL</publisher>
<place>
<placeTerm type="text">Torino, Italia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Purpose: Based on the examples of English and German, we investigate to what extent parsers trained on modern variants of these languages can be transferred to older language levels without loss. Methods: We developed a treebank called DoTT (https://github.com/texttechnologylab/DoTT) which covers, roughly, the time period from 1800 until today, in conjunction with the further development of the annotation tool DependencyAnnotator. DoTT consists of a collection of diachronic corpora enriched with dependency annotations using 3 parsers, 6 pre-trained language models, 5 newly trained models for German, and two tag sets (TIGER and Universal Dependencies). To assess how the different parsers perform on texts from different time periods, we created a gold standard sample as a benchmark. Results: We found that the parsers/models perform quite well on modern texts (document-level LAS ranging from 82.89 to 88.54) and slightly worse on older texts, as expected (average document-level LAS 84.60 vs. 86.14), but not significantly. For German texts, the (German) TIGER scheme achieved slightly better results than UD. Conclusion: Overall, this result speaks for the transferability of parsers to past language levels, at least dating back until around 1800. This very transferability, it is however argued, means that studies of language change in the field of dependency syntax can draw on dependency distance but miss out on some grammatical phenomena.</abstract>
<identifier type="citekey">luecking-etal-2024-dependencies</identifier>
<location>
<url>https://aclanthology.org/2024.lrec-main.415</url>
</location>
<part>
<date>2024-05</date>
<extent unit="page">
<start>4641</start>
<end>4653</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Dependencies over Times and Tools (DoTT)
%A Luecking, Andy
%A Abrami, Giuseppe
%A Hammerla, Leon
%A Rahn, Marc
%A Baumartz, Daniel
%A Eger, Steffen
%A Mehler, Alexander
%Y Calzolari, Nicoletta
%Y Kan, Min-Yen
%Y Hoste, Veronique
%Y Lenci, Alessandro
%Y Sakti, Sakriani
%Y Xue, Nianwen
%S Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)
%D 2024
%8 May
%I ELRA and ICCL
%C Torino, Italia
%F luecking-etal-2024-dependencies
%X Purpose: Based on the examples of English and German, we investigate to what extent parsers trained on modern variants of these languages can be transferred to older language levels without loss. Methods: We developed a treebank called DoTT (https://github.com/texttechnologylab/DoTT) which covers, roughly, the time period from 1800 until today, in conjunction with the further development of the annotation tool DependencyAnnotator. DoTT consists of a collection of diachronic corpora enriched with dependency annotations using 3 parsers, 6 pre-trained language models, 5 newly trained models for German, and two tag sets (TIGER and Universal Dependencies). To assess how the different parsers perform on texts from different time periods, we created a gold standard sample as a benchmark. Results: We found that the parsers/models perform quite well on modern texts (document-level LAS ranging from 82.89 to 88.54) and slightly worse on older texts, as expected (average document-level LAS 84.60 vs. 86.14), but not significantly. For German texts, the (German) TIGER scheme achieved slightly better results than UD. Conclusion: Overall, this result speaks for the transferability of parsers to past language levels, at least dating back until around 1800. This very transferability, it is however argued, means that studies of language change in the field of dependency syntax can draw on dependency distance but miss out on some grammatical phenomena.
%U https://aclanthology.org/2024.lrec-main.415
%P 4641-4653
Markdown (Informal)
[Dependencies over Times and Tools (DoTT)](https://aclanthology.org/2024.lrec-main.415) (Luecking et al., LREC-COLING 2024)
ACL
- Andy Luecking, Giuseppe Abrami, Leon Hammerla, Marc Rahn, Daniel Baumartz, Steffen Eger, and Alexander Mehler. 2024. Dependencies over Times and Tools (DoTT). In Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024), pages 4641–4653, Torino, Italia. ELRA and ICCL.