@inproceedings{verkijk-etal-2024-studying,
title = "Studying Language Variation Considering the Re-Usability of Modern Theories, Tools and Resources for Annotating Explicit and Implicit Events in Centuries Old Text",
author = "Verkijk, Stella and
Sommerauer, Pia and
Vossen, Piek",
editor = {Scherrer, Yves and
Jauhiainen, Tommi and
Ljube{\v{s}}i{\'c}, Nikola and
Zampieri, Marcos and
Nakov, Preslav and
Tiedemann, J{\"o}rg},
booktitle = "Proceedings of the Eleventh Workshop on NLP for Similar Languages, Varieties, and Dialects (VarDial 2024)",
month = jun,
year = "2024",
address = "Mexico City, Mexico",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.vardial-1.15",
doi = "10.18653/v1/2024.vardial-1.15",
pages = "174--187",
abstract = "This paper discusses the re-usibility of existing approaches, tools and automatic techniques for the annotation and detection of events in a challenging variant of centuries old Dutch written in the archives of the Dutch East India Company. We describe our annotation process and provide a thorough analysis of different versions of manually annotated data and the first automatic results from two fine-tuned Language Models. Through the analysis of this complete process, the paper studies two things: to what extent we can use NLP theories and tasks formulated for modern English to formulate an annotation task for Early Modern Dutch and to what extent we can use NLP models and tools built for modern Dutch (and other languages) on Early Modern Dutch. We believe these analyses give us insight into how to deal with the large variation language showcases in describing events, and how this variation may differ accross domains. We release the annotation guidelines, annotated data, and code.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="verkijk-etal-2024-studying">
<titleInfo>
<title>Studying Language Variation Considering the Re-Usability of Modern Theories, Tools and Resources for Annotating Explicit and Implicit Events in Centuries Old Text</title>
</titleInfo>
<name type="personal">
<namePart type="given">Stella</namePart>
<namePart type="family">Verkijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pia</namePart>
<namePart type="family">Sommerauer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Piek</namePart>
<namePart type="family">Vossen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Eleventh Workshop on NLP for Similar Languages, Varieties, and Dialects (VarDial 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yves</namePart>
<namePart type="family">Scherrer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tommi</namePart>
<namePart type="family">Jauhiainen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nikola</namePart>
<namePart type="family">Ljubešić</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marcos</namePart>
<namePart type="family">Zampieri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Preslav</namePart>
<namePart type="family">Nakov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jörg</namePart>
<namePart type="family">Tiedemann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Mexico City, Mexico</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper discusses the re-usibility of existing approaches, tools and automatic techniques for the annotation and detection of events in a challenging variant of centuries old Dutch written in the archives of the Dutch East India Company. We describe our annotation process and provide a thorough analysis of different versions of manually annotated data and the first automatic results from two fine-tuned Language Models. Through the analysis of this complete process, the paper studies two things: to what extent we can use NLP theories and tasks formulated for modern English to formulate an annotation task for Early Modern Dutch and to what extent we can use NLP models and tools built for modern Dutch (and other languages) on Early Modern Dutch. We believe these analyses give us insight into how to deal with the large variation language showcases in describing events, and how this variation may differ accross domains. We release the annotation guidelines, annotated data, and code.</abstract>
<identifier type="citekey">verkijk-etal-2024-studying</identifier>
<identifier type="doi">10.18653/v1/2024.vardial-1.15</identifier>
<location>
<url>https://aclanthology.org/2024.vardial-1.15</url>
</location>
<part>
<date>2024-06</date>
<extent unit="page">
<start>174</start>
<end>187</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Studying Language Variation Considering the Re-Usability of Modern Theories, Tools and Resources for Annotating Explicit and Implicit Events in Centuries Old Text
%A Verkijk, Stella
%A Sommerauer, Pia
%A Vossen, Piek
%Y Scherrer, Yves
%Y Jauhiainen, Tommi
%Y Ljubešić, Nikola
%Y Zampieri, Marcos
%Y Nakov, Preslav
%Y Tiedemann, Jörg
%S Proceedings of the Eleventh Workshop on NLP for Similar Languages, Varieties, and Dialects (VarDial 2024)
%D 2024
%8 June
%I Association for Computational Linguistics
%C Mexico City, Mexico
%F verkijk-etal-2024-studying
%X This paper discusses the re-usibility of existing approaches, tools and automatic techniques for the annotation and detection of events in a challenging variant of centuries old Dutch written in the archives of the Dutch East India Company. We describe our annotation process and provide a thorough analysis of different versions of manually annotated data and the first automatic results from two fine-tuned Language Models. Through the analysis of this complete process, the paper studies two things: to what extent we can use NLP theories and tasks formulated for modern English to formulate an annotation task for Early Modern Dutch and to what extent we can use NLP models and tools built for modern Dutch (and other languages) on Early Modern Dutch. We believe these analyses give us insight into how to deal with the large variation language showcases in describing events, and how this variation may differ accross domains. We release the annotation guidelines, annotated data, and code.
%R 10.18653/v1/2024.vardial-1.15
%U https://aclanthology.org/2024.vardial-1.15
%U https://doi.org/10.18653/v1/2024.vardial-1.15
%P 174-187
Markdown (Informal)
[Studying Language Variation Considering the Re-Usability of Modern Theories, Tools and Resources for Annotating Explicit and Implicit Events in Centuries Old Text](https://aclanthology.org/2024.vardial-1.15) (Verkijk et al., VarDial-WS 2024)
ACL