@inproceedings{moeller-etal-2020-igt2p,
title = "{IGT}2{P}: From Interlinear Glossed Texts to Paradigms",
author = "Moeller, Sarah and
Liu, Ling and
Yang, Changbing and
Kann, Katharina and
Hulden, Mans",
editor = "Webber, Bonnie and
Cohn, Trevor and
He, Yulan and
Liu, Yang",
booktitle = "Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP)",
month = nov,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.emnlp-main.424",
doi = "10.18653/v1/2020.emnlp-main.424",
pages = "5251--5262",
abstract = "An intermediate step in the linguistic analysis of an under-documented language is to find and organize inflected forms that are attested in natural speech. From this data, linguists generate unseen inflected word forms in order to test hypotheses about the language{'}s inflectional patterns and to complete inflectional paradigm tables. To get the data linguists spend many hours manually creating interlinear glossed texts (IGTs). We introduce a new task that speeds this process and automatically generates new morphological resources for natural language processing systems: IGT-to-paradigms (IGT2P). IGT2P generates entire morphological paradigms from IGT input. We show that existing morphological reinflection models can solve the task with 21{\%} to 64{\%} accuracy, depending on the language. We further find that (i) having a language expert spend only a few hours cleaning the noisy IGT data improves performance by as much as 21 percentage points, and (ii) POS tags, which are generally considered a necessary part of NLP morphological reinflection input, have no effect on the accuracy of the models considered here.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="moeller-etal-2020-igt2p">
<titleInfo>
<title>IGT2P: From Interlinear Glossed Texts to Paradigms</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sarah</namePart>
<namePart type="family">Moeller</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ling</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Changbing</namePart>
<namePart type="family">Yang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Katharina</namePart>
<namePart type="family">Kann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mans</namePart>
<namePart type="family">Hulden</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Bonnie</namePart>
<namePart type="family">Webber</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Trevor</namePart>
<namePart type="family">Cohn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yulan</namePart>
<namePart type="family">He</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yang</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>An intermediate step in the linguistic analysis of an under-documented language is to find and organize inflected forms that are attested in natural speech. From this data, linguists generate unseen inflected word forms in order to test hypotheses about the language’s inflectional patterns and to complete inflectional paradigm tables. To get the data linguists spend many hours manually creating interlinear glossed texts (IGTs). We introduce a new task that speeds this process and automatically generates new morphological resources for natural language processing systems: IGT-to-paradigms (IGT2P). IGT2P generates entire morphological paradigms from IGT input. We show that existing morphological reinflection models can solve the task with 21% to 64% accuracy, depending on the language. We further find that (i) having a language expert spend only a few hours cleaning the noisy IGT data improves performance by as much as 21 percentage points, and (ii) POS tags, which are generally considered a necessary part of NLP morphological reinflection input, have no effect on the accuracy of the models considered here.</abstract>
<identifier type="citekey">moeller-etal-2020-igt2p</identifier>
<identifier type="doi">10.18653/v1/2020.emnlp-main.424</identifier>
<location>
<url>https://aclanthology.org/2020.emnlp-main.424</url>
</location>
<part>
<date>2020-11</date>
<extent unit="page">
<start>5251</start>
<end>5262</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T IGT2P: From Interlinear Glossed Texts to Paradigms
%A Moeller, Sarah
%A Liu, Ling
%A Yang, Changbing
%A Kann, Katharina
%A Hulden, Mans
%Y Webber, Bonnie
%Y Cohn, Trevor
%Y He, Yulan
%Y Liu, Yang
%S Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP)
%D 2020
%8 November
%I Association for Computational Linguistics
%C Online
%F moeller-etal-2020-igt2p
%X An intermediate step in the linguistic analysis of an under-documented language is to find and organize inflected forms that are attested in natural speech. From this data, linguists generate unseen inflected word forms in order to test hypotheses about the language’s inflectional patterns and to complete inflectional paradigm tables. To get the data linguists spend many hours manually creating interlinear glossed texts (IGTs). We introduce a new task that speeds this process and automatically generates new morphological resources for natural language processing systems: IGT-to-paradigms (IGT2P). IGT2P generates entire morphological paradigms from IGT input. We show that existing morphological reinflection models can solve the task with 21% to 64% accuracy, depending on the language. We further find that (i) having a language expert spend only a few hours cleaning the noisy IGT data improves performance by as much as 21 percentage points, and (ii) POS tags, which are generally considered a necessary part of NLP morphological reinflection input, have no effect on the accuracy of the models considered here.
%R 10.18653/v1/2020.emnlp-main.424
%U https://aclanthology.org/2020.emnlp-main.424
%U https://doi.org/10.18653/v1/2020.emnlp-main.424
%P 5251-5262
Markdown (Informal)
[IGT2P: From Interlinear Glossed Texts to Paradigms](https://aclanthology.org/2020.emnlp-main.424) (Moeller et al., EMNLP 2020)
ACL
- Sarah Moeller, Ling Liu, Changbing Yang, Katharina Kann, and Mans Hulden. 2020. IGT2P: From Interlinear Glossed Texts to Paradigms. In Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP), pages 5251–5262, Online. Association for Computational Linguistics.