@inproceedings{more-tsarfaty-2016-data,
title = "Data-Driven Morphological Analysis and Disambiguation for Morphologically Rich Languages and {U}niversal {D}ependencies",
author = "More, Amir and
Tsarfaty, Reut",
editor = "Matsumoto, Yuji and
Prasad, Rashmi",
booktitle = "Proceedings of {COLING} 2016, the 26th International Conference on Computational Linguistics: Technical Papers",
month = dec,
year = "2016",
address = "Osaka, Japan",
publisher = "The COLING 2016 Organizing Committee",
url = "https://aclanthology.org/C16-1033",
pages = "337--348",
abstract = "Parsing texts into universal dependencies (UD) in realistic scenarios requires infrastructure for the morphological analysis and disambiguation (MA{\&}D) of typologically different languages as a first tier. MA{\&}D is particularly challenging in morphologically rich languages (MRLs), where the ambiguous space-delimited tokens ought to be disambiguated with respect to their constituent morphemes, each morpheme carrying its own tag and a rich set features. Here we present a novel, language-agnostic, framework for MA{\&}D, based on a transition system with two variants {---} word-based and morpheme-based {---} and a dedicated transition to mitigate the biases of variable-length morpheme sequences. Our experiments on a Modern Hebrew case study show state of the art results, and we show that the morpheme-based MD consistently outperforms our word-based variant. We further illustrate the utility and multilingual coverage of our framework by morphologically analyzing and disambiguating the large set of languages in the UD treebanks.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="more-tsarfaty-2016-data">
<titleInfo>
<title>Data-Driven Morphological Analysis and Disambiguation for Morphologically Rich Languages and Universal Dependencies</title>
</titleInfo>
<name type="personal">
<namePart type="given">Amir</namePart>
<namePart type="family">More</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Reut</namePart>
<namePart type="family">Tsarfaty</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2016-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yuji</namePart>
<namePart type="family">Matsumoto</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rashmi</namePart>
<namePart type="family">Prasad</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>The COLING 2016 Organizing Committee</publisher>
<place>
<placeTerm type="text">Osaka, Japan</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Parsing texts into universal dependencies (UD) in realistic scenarios requires infrastructure for the morphological analysis and disambiguation (MA&D) of typologically different languages as a first tier. MA&D is particularly challenging in morphologically rich languages (MRLs), where the ambiguous space-delimited tokens ought to be disambiguated with respect to their constituent morphemes, each morpheme carrying its own tag and a rich set features. Here we present a novel, language-agnostic, framework for MA&D, based on a transition system with two variants — word-based and morpheme-based — and a dedicated transition to mitigate the biases of variable-length morpheme sequences. Our experiments on a Modern Hebrew case study show state of the art results, and we show that the morpheme-based MD consistently outperforms our word-based variant. We further illustrate the utility and multilingual coverage of our framework by morphologically analyzing and disambiguating the large set of languages in the UD treebanks.</abstract>
<identifier type="citekey">more-tsarfaty-2016-data</identifier>
<location>
<url>https://aclanthology.org/C16-1033</url>
</location>
<part>
<date>2016-12</date>
<extent unit="page">
<start>337</start>
<end>348</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Data-Driven Morphological Analysis and Disambiguation for Morphologically Rich Languages and Universal Dependencies
%A More, Amir
%A Tsarfaty, Reut
%Y Matsumoto, Yuji
%Y Prasad, Rashmi
%S Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers
%D 2016
%8 December
%I The COLING 2016 Organizing Committee
%C Osaka, Japan
%F more-tsarfaty-2016-data
%X Parsing texts into universal dependencies (UD) in realistic scenarios requires infrastructure for the morphological analysis and disambiguation (MA&D) of typologically different languages as a first tier. MA&D is particularly challenging in morphologically rich languages (MRLs), where the ambiguous space-delimited tokens ought to be disambiguated with respect to their constituent morphemes, each morpheme carrying its own tag and a rich set features. Here we present a novel, language-agnostic, framework for MA&D, based on a transition system with two variants — word-based and morpheme-based — and a dedicated transition to mitigate the biases of variable-length morpheme sequences. Our experiments on a Modern Hebrew case study show state of the art results, and we show that the morpheme-based MD consistently outperforms our word-based variant. We further illustrate the utility and multilingual coverage of our framework by morphologically analyzing and disambiguating the large set of languages in the UD treebanks.
%U https://aclanthology.org/C16-1033
%P 337-348
Markdown (Informal)
[Data-Driven Morphological Analysis and Disambiguation for Morphologically Rich Languages and Universal Dependencies](https://aclanthology.org/C16-1033) (More & Tsarfaty, COLING 2016)
ACL