@inproceedings{blancafort-2010-learning,
title = "Learning Morphology of {R}omance, {G}ermanic and {S}lavic Languages with the Tool Linguistica",
author = "Blancafort, Helena",
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Maegaard, Bente and
Mariani, Joseph and
Odijk, Jan and
Piperidis, Stelios and
Rosner, Mike and
Tapias, Daniel",
booktitle = "Proceedings of the Seventh International Conference on Language Resources and Evaluation ({LREC}'10)",
month = may,
year = "2010",
address = "Valletta, Malta",
publisher = "European Language Resources Association (ELRA)",
url = "http://www.lrec-conf.org/proceedings/lrec2010/pdf/872_Paper.pdf",
abstract = "In this paper we present preliminary work conducted on semi-automatic induction of inflectional paradigms from non annotated corpora using the open-source tool Linguistica (Goldsmith 2001) that can be utilized without any prior knowledge of the language. The aim is to induce morphology information from corpora such as to compare languages and foresee the difficulty to develop morphosyntactic lexica. We report on a series of corpus-based experiments run with Linguistica in Romance languages (Catalan, French, Italian, Portuguese, and Spanish), Germanic languages (Dutch, English and German), and Slavic language Polish. For each language we obtained interesting clusters of stems sharing the same suffixes. They can be seen as mini inflectional paradigms that include productive derivative suffixes. We ranked results depending on the size of the paradigms (maximum number of suffixes per stem) per language. Results show that it is useful to get a first idea of the role and complexity of inflection and derivation in a language, to compare results with other languages, and that it could be useful to build lexicographic resources from scratch. Still, special post-processing is needed to face the two principal drawbacks of the tool: no clear distinction between inflection and derivation, and not taking allomorphy into account.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="blancafort-2010-learning">
<titleInfo>
<title>Learning Morphology of Romance, Germanic and Slavic Languages with the Tool Linguistica</title>
</titleInfo>
<name type="personal">
<namePart type="given">Helena</namePart>
<namePart type="family">Blancafort</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2010-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Seventh International Conference on Language Resources and Evaluation (LREC’10)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stelios</namePart>
<namePart type="family">Piperidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mike</namePart>
<namePart type="family">Rosner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Tapias</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Valletta, Malta</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this paper we present preliminary work conducted on semi-automatic induction of inflectional paradigms from non annotated corpora using the open-source tool Linguistica (Goldsmith 2001) that can be utilized without any prior knowledge of the language. The aim is to induce morphology information from corpora such as to compare languages and foresee the difficulty to develop morphosyntactic lexica. We report on a series of corpus-based experiments run with Linguistica in Romance languages (Catalan, French, Italian, Portuguese, and Spanish), Germanic languages (Dutch, English and German), and Slavic language Polish. For each language we obtained interesting clusters of stems sharing the same suffixes. They can be seen as mini inflectional paradigms that include productive derivative suffixes. We ranked results depending on the size of the paradigms (maximum number of suffixes per stem) per language. Results show that it is useful to get a first idea of the role and complexity of inflection and derivation in a language, to compare results with other languages, and that it could be useful to build lexicographic resources from scratch. Still, special post-processing is needed to face the two principal drawbacks of the tool: no clear distinction between inflection and derivation, and not taking allomorphy into account.</abstract>
<identifier type="citekey">blancafort-2010-learning</identifier>
<location>
<url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/872_Paper.pdf</url>
</location>
<part>
<date>2010-05</date>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Learning Morphology of Romance, Germanic and Slavic Languages with the Tool Linguistica
%A Blancafort, Helena
%Y Calzolari, Nicoletta
%Y Choukri, Khalid
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Odijk, Jan
%Y Piperidis, Stelios
%Y Rosner, Mike
%Y Tapias, Daniel
%S Proceedings of the Seventh International Conference on Language Resources and Evaluation (LREC’10)
%D 2010
%8 May
%I European Language Resources Association (ELRA)
%C Valletta, Malta
%F blancafort-2010-learning
%X In this paper we present preliminary work conducted on semi-automatic induction of inflectional paradigms from non annotated corpora using the open-source tool Linguistica (Goldsmith 2001) that can be utilized without any prior knowledge of the language. The aim is to induce morphology information from corpora such as to compare languages and foresee the difficulty to develop morphosyntactic lexica. We report on a series of corpus-based experiments run with Linguistica in Romance languages (Catalan, French, Italian, Portuguese, and Spanish), Germanic languages (Dutch, English and German), and Slavic language Polish. For each language we obtained interesting clusters of stems sharing the same suffixes. They can be seen as mini inflectional paradigms that include productive derivative suffixes. We ranked results depending on the size of the paradigms (maximum number of suffixes per stem) per language. Results show that it is useful to get a first idea of the role and complexity of inflection and derivation in a language, to compare results with other languages, and that it could be useful to build lexicographic resources from scratch. Still, special post-processing is needed to face the two principal drawbacks of the tool: no clear distinction between inflection and derivation, and not taking allomorphy into account.
%U http://www.lrec-conf.org/proceedings/lrec2010/pdf/872_Paper.pdf
Markdown (Informal)
[Learning Morphology of Romance, Germanic and Slavic Languages with the Tool Linguistica](http://www.lrec-conf.org/proceedings/lrec2010/pdf/872_Paper.pdf) (Blancafort, LREC 2010)
ACL