@inproceedings{kulick-bies-2016-rapid,
title = "Rapid Development of Morphological Analyzers for Typologically Diverse Languages",
author = "Kulick, Seth and
Bies, Ann",
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Declerck, Thierry and
Goggi, Sara and
Grobelnik, Marko and
Maegaard, Bente and
Mariani, Joseph and
Mazo, Helene and
Moreno, Asuncion and
Odijk, Jan and
Piperidis, Stelios",
booktitle = "Proceedings of the Tenth International Conference on Language Resources and Evaluation ({LREC}'16)",
month = may,
year = "2016",
address = "Portoro{\v{z}}, Slovenia",
publisher = "European Language Resources Association (ELRA)",
url = "https://aclanthology.org/L16-1405",
pages = "2551--2557",
abstract = "The Low Resource Language research conducted under DARPA{'}s Broad Operational Language Translation (BOLT) program required the rapid creation of text corpora of typologically diverse languages (Turkish, Hausa, and Uzbek) which were annotated with morphological information, along with other types of annotation. Since the output of morphological analyzers is a significant aid to morphological annotation, we developed a morphological analyzer for each language in order to support the annotation task, and also as a deliverable by itself. Our framework for analyzer creation results in tables similar to those used in the successful SAMA analyzer for Arabic, but with a more abstract linguistic level, from which the tables are derived. A lexicon was developed from available resources for integration with the analyzer, and given the speed of development and uncertain coverage of the lexicon, we assumed that the analyzer would necessarily be lacking in some coverage for the project annotation. Our analyzer framework was therefore focused on rapid implementation of the key structures of the language, together with accepting {``}wildcard{''} solutions as possible analyses for a word with an unknown stem, building upon our similar experiences with morphological annotation with Modern Standard Arabic and Egyptian Arabic.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="kulick-bies-2016-rapid">
<titleInfo>
<title>Rapid Development of Morphological Analyzers for Typologically Diverse Languages</title>
</titleInfo>
<name type="personal">
<namePart type="given">Seth</namePart>
<namePart type="family">Kulick</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ann</namePart>
<namePart type="family">Bies</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2016-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC’16)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thierry</namePart>
<namePart type="family">Declerck</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sara</namePart>
<namePart type="family">Goggi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marko</namePart>
<namePart type="family">Grobelnik</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Helene</namePart>
<namePart type="family">Mazo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Asuncion</namePart>
<namePart type="family">Moreno</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stelios</namePart>
<namePart type="family">Piperidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Portorož, Slovenia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The Low Resource Language research conducted under DARPA’s Broad Operational Language Translation (BOLT) program required the rapid creation of text corpora of typologically diverse languages (Turkish, Hausa, and Uzbek) which were annotated with morphological information, along with other types of annotation. Since the output of morphological analyzers is a significant aid to morphological annotation, we developed a morphological analyzer for each language in order to support the annotation task, and also as a deliverable by itself. Our framework for analyzer creation results in tables similar to those used in the successful SAMA analyzer for Arabic, but with a more abstract linguistic level, from which the tables are derived. A lexicon was developed from available resources for integration with the analyzer, and given the speed of development and uncertain coverage of the lexicon, we assumed that the analyzer would necessarily be lacking in some coverage for the project annotation. Our analyzer framework was therefore focused on rapid implementation of the key structures of the language, together with accepting “wildcard” solutions as possible analyses for a word with an unknown stem, building upon our similar experiences with morphological annotation with Modern Standard Arabic and Egyptian Arabic.</abstract>
<identifier type="citekey">kulick-bies-2016-rapid</identifier>
<location>
<url>https://aclanthology.org/L16-1405</url>
</location>
<part>
<date>2016-05</date>
<extent unit="page">
<start>2551</start>
<end>2557</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Rapid Development of Morphological Analyzers for Typologically Diverse Languages
%A Kulick, Seth
%A Bies, Ann
%Y Calzolari, Nicoletta
%Y Choukri, Khalid
%Y Declerck, Thierry
%Y Goggi, Sara
%Y Grobelnik, Marko
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Mazo, Helene
%Y Moreno, Asuncion
%Y Odijk, Jan
%Y Piperidis, Stelios
%S Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC’16)
%D 2016
%8 May
%I European Language Resources Association (ELRA)
%C Portorož, Slovenia
%F kulick-bies-2016-rapid
%X The Low Resource Language research conducted under DARPA’s Broad Operational Language Translation (BOLT) program required the rapid creation of text corpora of typologically diverse languages (Turkish, Hausa, and Uzbek) which were annotated with morphological information, along with other types of annotation. Since the output of morphological analyzers is a significant aid to morphological annotation, we developed a morphological analyzer for each language in order to support the annotation task, and also as a deliverable by itself. Our framework for analyzer creation results in tables similar to those used in the successful SAMA analyzer for Arabic, but with a more abstract linguistic level, from which the tables are derived. A lexicon was developed from available resources for integration with the analyzer, and given the speed of development and uncertain coverage of the lexicon, we assumed that the analyzer would necessarily be lacking in some coverage for the project annotation. Our analyzer framework was therefore focused on rapid implementation of the key structures of the language, together with accepting “wildcard” solutions as possible analyses for a word with an unknown stem, building upon our similar experiences with morphological annotation with Modern Standard Arabic and Egyptian Arabic.
%U https://aclanthology.org/L16-1405
%P 2551-2557
Markdown (Informal)
[Rapid Development of Morphological Analyzers for Typologically Diverse Languages](https://aclanthology.org/L16-1405) (Kulick & Bies, LREC 2016)
ACL