@inproceedings{sarveswaran-etal-2019-using,
title = "Using Meta-Morph Rules to develop Morphological Analysers: A case study concerning {T}amil",
author = "Sarveswaran, Kengatharaiyer and
Dias, Gihan and
Butt, Miriam",
editor = "Vogler, Heiko and
Maletti, Andreas",
booktitle = "Proceedings of the 14th International Conference on Finite-State Methods and Natural Language Processing",
month = sep,
year = "2019",
address = "Dresden, Germany",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W19-3111",
doi = "10.18653/v1/W19-3111",
pages = "76--86",
abstract = "This paper describes a new and larger coverage Finite-State Morphological Analyser (FSM) and Generator for the Dravidian language Tamil. The FSM has been developed in the context of computational grammar engineering, adhering to the standards of the ParGram effort. Tamil is a morphologically rich language and the interaction between linguistic analysis and formal implementation is complex, resulting in a challenging task. In order to allow the development of the FSM to focus more on the linguistic analysis and less on the formal details, we have developed a system of meta-morph(ology) rules along with a script which translates these rules into FSM processable representations. The introduction of meta-morph rules makes it possible for computationally naive linguists to interact with the system and to expand it in future work. We found that the meta-morph rules help to express linguistic generalisations and reduce the manual effort of writing lexical classes for morphological analysis. Our Tamil FSM currently handles mainly the inflectional morphology of 3,300 verb roots and their 260 forms. Further, it also has a lexicon of approximately 100,000 nouns along with a guesser to handle out-of-vocabulary items. Although the Tamil FSM was primarily developed to be part of a computational grammar, it can also be used as a web or stand-alone application for other NLP tasks, as per general ParGram practice.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="sarveswaran-etal-2019-using">
<titleInfo>
<title>Using Meta-Morph Rules to develop Morphological Analysers: A case study concerning Tamil</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kengatharaiyer</namePart>
<namePart type="family">Sarveswaran</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gihan</namePart>
<namePart type="family">Dias</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Miriam</namePart>
<namePart type="family">Butt</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 14th International Conference on Finite-State Methods and Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Heiko</namePart>
<namePart type="family">Vogler</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andreas</namePart>
<namePart type="family">Maletti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Dresden, Germany</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper describes a new and larger coverage Finite-State Morphological Analyser (FSM) and Generator for the Dravidian language Tamil. The FSM has been developed in the context of computational grammar engineering, adhering to the standards of the ParGram effort. Tamil is a morphologically rich language and the interaction between linguistic analysis and formal implementation is complex, resulting in a challenging task. In order to allow the development of the FSM to focus more on the linguistic analysis and less on the formal details, we have developed a system of meta-morph(ology) rules along with a script which translates these rules into FSM processable representations. The introduction of meta-morph rules makes it possible for computationally naive linguists to interact with the system and to expand it in future work. We found that the meta-morph rules help to express linguistic generalisations and reduce the manual effort of writing lexical classes for morphological analysis. Our Tamil FSM currently handles mainly the inflectional morphology of 3,300 verb roots and their 260 forms. Further, it also has a lexicon of approximately 100,000 nouns along with a guesser to handle out-of-vocabulary items. Although the Tamil FSM was primarily developed to be part of a computational grammar, it can also be used as a web or stand-alone application for other NLP tasks, as per general ParGram practice.</abstract>
<identifier type="citekey">sarveswaran-etal-2019-using</identifier>
<identifier type="doi">10.18653/v1/W19-3111</identifier>
<location>
<url>https://aclanthology.org/W19-3111</url>
</location>
<part>
<date>2019-09</date>
<extent unit="page">
<start>76</start>
<end>86</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Using Meta-Morph Rules to develop Morphological Analysers: A case study concerning Tamil
%A Sarveswaran, Kengatharaiyer
%A Dias, Gihan
%A Butt, Miriam
%Y Vogler, Heiko
%Y Maletti, Andreas
%S Proceedings of the 14th International Conference on Finite-State Methods and Natural Language Processing
%D 2019
%8 September
%I Association for Computational Linguistics
%C Dresden, Germany
%F sarveswaran-etal-2019-using
%X This paper describes a new and larger coverage Finite-State Morphological Analyser (FSM) and Generator for the Dravidian language Tamil. The FSM has been developed in the context of computational grammar engineering, adhering to the standards of the ParGram effort. Tamil is a morphologically rich language and the interaction between linguistic analysis and formal implementation is complex, resulting in a challenging task. In order to allow the development of the FSM to focus more on the linguistic analysis and less on the formal details, we have developed a system of meta-morph(ology) rules along with a script which translates these rules into FSM processable representations. The introduction of meta-morph rules makes it possible for computationally naive linguists to interact with the system and to expand it in future work. We found that the meta-morph rules help to express linguistic generalisations and reduce the manual effort of writing lexical classes for morphological analysis. Our Tamil FSM currently handles mainly the inflectional morphology of 3,300 verb roots and their 260 forms. Further, it also has a lexicon of approximately 100,000 nouns along with a guesser to handle out-of-vocabulary items. Although the Tamil FSM was primarily developed to be part of a computational grammar, it can also be used as a web or stand-alone application for other NLP tasks, as per general ParGram practice.
%R 10.18653/v1/W19-3111
%U https://aclanthology.org/W19-3111
%U https://doi.org/10.18653/v1/W19-3111
%P 76-86
Markdown (Informal)
[Using Meta-Morph Rules to develop Morphological Analysers: A case study concerning Tamil](https://aclanthology.org/W19-3111) (Sarveswaran et al., FSMNLP 2019)
ACL