@inproceedings{vukovic-etal-2019-corpora,
title = "Corpora and Processing Tools for Non-standard Contemporary and Diachronic {B}alkan {S}lavic",
author = {Vukovic, Teodora and
Muheim, Nora and
Winist{\"o}rfer, Olivier and
{\v{S}}imko, Ivan and
Makarova, Anastasia and
Bradjan, Sanja},
editor = "Kovatchev, Venelin and
Temnikova, Irina and
{\v{S}}andrih, Branislava and
Nikolova, Ivelina",
booktitle = "Proceedings of the Student Research Workshop Associated with RANLP 2019",
month = sep,
year = "2019",
address = "Varna, Bulgaria",
publisher = "INCOMA Ltd.",
url = "https://aclanthology.org/R19-2010",
doi = "10.26615/issn.2603-2821.2019_010",
pages = "62--68",
abstract = "The paper describes three corpora of different varieties of BS that are currently being developed with the goal of providing data for the analysis of the diatopic and diachronic variation in non-standard Balkan Slavic. The corpora includes spoken materials from Torlak, Macedonian dialects, as well as the manuscripts of pre-standardized Bulgarian. Apart from the texts, tools for PoS annotation and lemmatization for all varieties are being created, as well as syntactic parsing for Torlak and Bulgarian varieties. The corpora are built using a unified methodology, relying on the pest practices and state-of-the-art methods from the field. The uniform methodology allows the contrastive analysis of the data from different varieties. The corpora under construction can be considered a crucial contribution to the linguistic research on the languages in the Balkans as they provide the lacking data needed for the studies of linguistic variation in the Balkan Slavic, and enable the comparison of the said varieties with other neighbouring languages.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="vukovic-etal-2019-corpora">
<titleInfo>
<title>Corpora and Processing Tools for Non-standard Contemporary and Diachronic Balkan Slavic</title>
</titleInfo>
<name type="personal">
<namePart type="given">Teodora</namePart>
<namePart type="family">Vukovic</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nora</namePart>
<namePart type="family">Muheim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Olivier</namePart>
<namePart type="family">Winistörfer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ivan</namePart>
<namePart type="family">Šimko</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anastasia</namePart>
<namePart type="family">Makarova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sanja</namePart>
<namePart type="family">Bradjan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Student Research Workshop Associated with RANLP 2019</title>
</titleInfo>
<name type="personal">
<namePart type="given">Venelin</namePart>
<namePart type="family">Kovatchev</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Irina</namePart>
<namePart type="family">Temnikova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Branislava</namePart>
<namePart type="family">Šandrih</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ivelina</namePart>
<namePart type="family">Nikolova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>INCOMA Ltd.</publisher>
<place>
<placeTerm type="text">Varna, Bulgaria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The paper describes three corpora of different varieties of BS that are currently being developed with the goal of providing data for the analysis of the diatopic and diachronic variation in non-standard Balkan Slavic. The corpora includes spoken materials from Torlak, Macedonian dialects, as well as the manuscripts of pre-standardized Bulgarian. Apart from the texts, tools for PoS annotation and lemmatization for all varieties are being created, as well as syntactic parsing for Torlak and Bulgarian varieties. The corpora are built using a unified methodology, relying on the pest practices and state-of-the-art methods from the field. The uniform methodology allows the contrastive analysis of the data from different varieties. The corpora under construction can be considered a crucial contribution to the linguistic research on the languages in the Balkans as they provide the lacking data needed for the studies of linguistic variation in the Balkan Slavic, and enable the comparison of the said varieties with other neighbouring languages.</abstract>
<identifier type="citekey">vukovic-etal-2019-corpora</identifier>
<identifier type="doi">10.26615/issn.2603-2821.2019_010</identifier>
<location>
<url>https://aclanthology.org/R19-2010</url>
</location>
<part>
<date>2019-09</date>
<extent unit="page">
<start>62</start>
<end>68</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Corpora and Processing Tools for Non-standard Contemporary and Diachronic Balkan Slavic
%A Vukovic, Teodora
%A Muheim, Nora
%A Winistörfer, Olivier
%A Šimko, Ivan
%A Makarova, Anastasia
%A Bradjan, Sanja
%Y Kovatchev, Venelin
%Y Temnikova, Irina
%Y Šandrih, Branislava
%Y Nikolova, Ivelina
%S Proceedings of the Student Research Workshop Associated with RANLP 2019
%D 2019
%8 September
%I INCOMA Ltd.
%C Varna, Bulgaria
%F vukovic-etal-2019-corpora
%X The paper describes three corpora of different varieties of BS that are currently being developed with the goal of providing data for the analysis of the diatopic and diachronic variation in non-standard Balkan Slavic. The corpora includes spoken materials from Torlak, Macedonian dialects, as well as the manuscripts of pre-standardized Bulgarian. Apart from the texts, tools for PoS annotation and lemmatization for all varieties are being created, as well as syntactic parsing for Torlak and Bulgarian varieties. The corpora are built using a unified methodology, relying on the pest practices and state-of-the-art methods from the field. The uniform methodology allows the contrastive analysis of the data from different varieties. The corpora under construction can be considered a crucial contribution to the linguistic research on the languages in the Balkans as they provide the lacking data needed for the studies of linguistic variation in the Balkan Slavic, and enable the comparison of the said varieties with other neighbouring languages.
%R 10.26615/issn.2603-2821.2019_010
%U https://aclanthology.org/R19-2010
%U https://doi.org/10.26615/issn.2603-2821.2019_010
%P 62-68
Markdown (Informal)
[Corpora and Processing Tools for Non-standard Contemporary and Diachronic Balkan Slavic](https://aclanthology.org/R19-2010) (Vukovic et al., RANLP 2019)
ACL