@inproceedings{sukhareva-etal-2017-distantly,
title = "Distantly Supervised {POS} Tagging of Low-Resource Languages under Extreme Data Sparsity: The Case of {H}ittite",
author = {Sukhareva, Maria and
Fuscagni, Francesco and
Daxenberger, Johannes and
G{\"o}rke, Susanne and
Prechel, Doris and
Gurevych, Iryna},
editor = "Alex, Beatrice and
Degaetano-Ortlieb, Stefania and
Feldman, Anna and
Kazantseva, Anna and
Reiter, Nils and
Szpakowicz, Stan",
booktitle = "Proceedings of the Joint {SIGHUM} Workshop on Computational Linguistics for Cultural Heritage, Social Sciences, Humanities and Literature",
month = aug,
year = "2017",
address = "Vancouver, Canada",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W17-2213",
doi = "10.18653/v1/W17-2213",
pages = "95--104",
abstract = "This paper presents a statistical approach to automatic morphosyntactic annotation of Hittite transcripts. Hittite is an extinct Indo-European language using the cuneiform script. There are currently no morphosyntactic annotations available for Hittite, so we explored methods of distant supervision. The annotations were projected from parallel German translations of the Hittite texts. In order to reduce data sparsity, we applied stemming of German and Hittite texts. As there is no off-the-shelf Hittite stemmer, a stemmer for Hittite was developed for this purpose. The resulting annotation projections were used to train a POS tagger, achieving an accuracy of 69{\%} on a test sample. To our knowledge, this is the first attempt of statistical POS tagging of a cuneiform language.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="sukhareva-etal-2017-distantly">
<titleInfo>
<title>Distantly Supervised POS Tagging of Low-Resource Languages under Extreme Data Sparsity: The Case of Hittite</title>
</titleInfo>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="family">Sukhareva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Francesco</namePart>
<namePart type="family">Fuscagni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Johannes</namePart>
<namePart type="family">Daxenberger</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Susanne</namePart>
<namePart type="family">Görke</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Doris</namePart>
<namePart type="family">Prechel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Iryna</namePart>
<namePart type="family">Gurevych</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2017-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Joint SIGHUM Workshop on Computational Linguistics for Cultural Heritage, Social Sciences, Humanities and Literature</title>
</titleInfo>
<name type="personal">
<namePart type="given">Beatrice</namePart>
<namePart type="family">Alex</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stefania</namePart>
<namePart type="family">Degaetano-Ortlieb</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anna</namePart>
<namePart type="family">Feldman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anna</namePart>
<namePart type="family">Kazantseva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nils</namePart>
<namePart type="family">Reiter</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stan</namePart>
<namePart type="family">Szpakowicz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vancouver, Canada</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper presents a statistical approach to automatic morphosyntactic annotation of Hittite transcripts. Hittite is an extinct Indo-European language using the cuneiform script. There are currently no morphosyntactic annotations available for Hittite, so we explored methods of distant supervision. The annotations were projected from parallel German translations of the Hittite texts. In order to reduce data sparsity, we applied stemming of German and Hittite texts. As there is no off-the-shelf Hittite stemmer, a stemmer for Hittite was developed for this purpose. The resulting annotation projections were used to train a POS tagger, achieving an accuracy of 69% on a test sample. To our knowledge, this is the first attempt of statistical POS tagging of a cuneiform language.</abstract>
<identifier type="citekey">sukhareva-etal-2017-distantly</identifier>
<identifier type="doi">10.18653/v1/W17-2213</identifier>
<location>
<url>https://aclanthology.org/W17-2213</url>
</location>
<part>
<date>2017-08</date>
<extent unit="page">
<start>95</start>
<end>104</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Distantly Supervised POS Tagging of Low-Resource Languages under Extreme Data Sparsity: The Case of Hittite
%A Sukhareva, Maria
%A Fuscagni, Francesco
%A Daxenberger, Johannes
%A Görke, Susanne
%A Prechel, Doris
%A Gurevych, Iryna
%Y Alex, Beatrice
%Y Degaetano-Ortlieb, Stefania
%Y Feldman, Anna
%Y Kazantseva, Anna
%Y Reiter, Nils
%Y Szpakowicz, Stan
%S Proceedings of the Joint SIGHUM Workshop on Computational Linguistics for Cultural Heritage, Social Sciences, Humanities and Literature
%D 2017
%8 August
%I Association for Computational Linguistics
%C Vancouver, Canada
%F sukhareva-etal-2017-distantly
%X This paper presents a statistical approach to automatic morphosyntactic annotation of Hittite transcripts. Hittite is an extinct Indo-European language using the cuneiform script. There are currently no morphosyntactic annotations available for Hittite, so we explored methods of distant supervision. The annotations were projected from parallel German translations of the Hittite texts. In order to reduce data sparsity, we applied stemming of German and Hittite texts. As there is no off-the-shelf Hittite stemmer, a stemmer for Hittite was developed for this purpose. The resulting annotation projections were used to train a POS tagger, achieving an accuracy of 69% on a test sample. To our knowledge, this is the first attempt of statistical POS tagging of a cuneiform language.
%R 10.18653/v1/W17-2213
%U https://aclanthology.org/W17-2213
%U https://doi.org/10.18653/v1/W17-2213
%P 95-104
Markdown (Informal)
[Distantly Supervised POS Tagging of Low-Resource Languages under Extreme Data Sparsity: The Case of Hittite](https://aclanthology.org/W17-2213) (Sukhareva et al., LaTeCH 2017)
ACL
- Maria Sukhareva, Francesco Fuscagni, Johannes Daxenberger, Susanne Görke, Doris Prechel, and Iryna Gurevych. 2017. Distantly Supervised POS Tagging of Low-Resource Languages under Extreme Data Sparsity: The Case of Hittite. In Proceedings of the Joint SIGHUM Workshop on Computational Linguistics for Cultural Heritage, Social Sciences, Humanities and Literature, pages 95–104, Vancouver, Canada. Association for Computational Linguistics.