@inproceedings{kindt-van-elverdinghe-2022-describing,
title = "Describing Language Variation in the Colophons of {A}rmenian Manuscripts",
author = "Kindt, Bastien and
Van Elverdinghe, Emmanuel",
editor = "Khurshudyan, Victoria and
Tomeh, Nadi and
Nouvel, Damien and
Donabedian, Anaid and
Vidal-Gorene, Chahan",
booktitle = "Proceedings of the Workshop on Processing Language Variation: Digital Armenian (DigitAm) within the 13th Language Resources and Evaluation Conference",
month = jun,
year = "2022",
address = "Marseille, France",
publisher = "European Language Resources Association",
url = "https://aclanthology.org/2022.digitam-1.4/",
pages = "20--27",
abstract = "The colophons of Armenian manuscripts constitute a large textual corpus spanning a millennium of written culture. These texts are highly diverse and rich in terms of linguistic variation. This poses a challenge to NLP tools, especially considering the fact that linguistic resources designed or suited for Armenian are still scarce. In this paper, we deal with a sub-corpus of colophons written to commemorate the rescue of a manuscript and dating from 1286 to ca. 1450, a thematic group distinguished by a particularly high concentration of words exhibiting linguistic variation. The text is processed (lemmatization, POS-tagging, and inflectional tagging) using the tools of the GREgORI Project and evaluated. Through a selection of examples, we show how variation is dealt with at each linguistic level (phonology, orthography, flexion, vocabulary, syntax). Complex variation, at the level of tokens or lemmata, is considered as well. The results of this work are used to enrich and refine the linguistic resources of the GREgORI project, which in turn benefits the processing of other texts."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="kindt-van-elverdinghe-2022-describing">
<titleInfo>
<title>Describing Language Variation in the Colophons of Armenian Manuscripts</title>
</titleInfo>
<name type="personal">
<namePart type="given">Bastien</namePart>
<namePart type="family">Kindt</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Emmanuel</namePart>
<namePart type="family">Van Elverdinghe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Workshop on Processing Language Variation: Digital Armenian (DigitAm) within the 13th Language Resources and Evaluation Conference</title>
</titleInfo>
<name type="personal">
<namePart type="given">Victoria</namePart>
<namePart type="family">Khurshudyan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nadi</namePart>
<namePart type="family">Tomeh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Damien</namePart>
<namePart type="family">Nouvel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anaid</namePart>
<namePart type="family">Donabedian</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chahan</namePart>
<namePart type="family">Vidal-Gorene</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association</publisher>
<place>
<placeTerm type="text">Marseille, France</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The colophons of Armenian manuscripts constitute a large textual corpus spanning a millennium of written culture. These texts are highly diverse and rich in terms of linguistic variation. This poses a challenge to NLP tools, especially considering the fact that linguistic resources designed or suited for Armenian are still scarce. In this paper, we deal with a sub-corpus of colophons written to commemorate the rescue of a manuscript and dating from 1286 to ca. 1450, a thematic group distinguished by a particularly high concentration of words exhibiting linguistic variation. The text is processed (lemmatization, POS-tagging, and inflectional tagging) using the tools of the GREgORI Project and evaluated. Through a selection of examples, we show how variation is dealt with at each linguistic level (phonology, orthography, flexion, vocabulary, syntax). Complex variation, at the level of tokens or lemmata, is considered as well. The results of this work are used to enrich and refine the linguistic resources of the GREgORI project, which in turn benefits the processing of other texts.</abstract>
<identifier type="citekey">kindt-van-elverdinghe-2022-describing</identifier>
<location>
<url>https://aclanthology.org/2022.digitam-1.4/</url>
</location>
<part>
<date>2022-06</date>
<extent unit="page">
<start>20</start>
<end>27</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Describing Language Variation in the Colophons of Armenian Manuscripts
%A Kindt, Bastien
%A Van Elverdinghe, Emmanuel
%Y Khurshudyan, Victoria
%Y Tomeh, Nadi
%Y Nouvel, Damien
%Y Donabedian, Anaid
%Y Vidal-Gorene, Chahan
%S Proceedings of the Workshop on Processing Language Variation: Digital Armenian (DigitAm) within the 13th Language Resources and Evaluation Conference
%D 2022
%8 June
%I European Language Resources Association
%C Marseille, France
%F kindt-van-elverdinghe-2022-describing
%X The colophons of Armenian manuscripts constitute a large textual corpus spanning a millennium of written culture. These texts are highly diverse and rich in terms of linguistic variation. This poses a challenge to NLP tools, especially considering the fact that linguistic resources designed or suited for Armenian are still scarce. In this paper, we deal with a sub-corpus of colophons written to commemorate the rescue of a manuscript and dating from 1286 to ca. 1450, a thematic group distinguished by a particularly high concentration of words exhibiting linguistic variation. The text is processed (lemmatization, POS-tagging, and inflectional tagging) using the tools of the GREgORI Project and evaluated. Through a selection of examples, we show how variation is dealt with at each linguistic level (phonology, orthography, flexion, vocabulary, syntax). Complex variation, at the level of tokens or lemmata, is considered as well. The results of this work are used to enrich and refine the linguistic resources of the GREgORI project, which in turn benefits the processing of other texts.
%U https://aclanthology.org/2022.digitam-1.4/
%P 20-27
Markdown (Informal)
[Describing Language Variation in the Colophons of Armenian Manuscripts](https://aclanthology.org/2022.digitam-1.4/) (Kindt & Van Elverdinghe, DigitAm 2022)
ACL