@inproceedings{dinu-etal-2012-romanian,
title = "The {R}omanian Neuter Examined Through A Two-Gender N-Gram Classification System",
author = "Dinu, Liviu P. and
Niculae, Vlad and
{\c{S}}ulea, Octavia-Maria",
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Declerck, Thierry and
Do{\u{g}}an, Mehmet U{\u{g}}ur and
Maegaard, Bente and
Mariani, Joseph and
Moreno, Asuncion and
Odijk, Jan and
Piperidis, Stelios",
booktitle = "Proceedings of the Eighth International Conference on Language Resources and Evaluation ({LREC}'12)",
month = may,
year = "2012",
address = "Istanbul, Turkey",
publisher = "European Language Resources Association (ELRA)",
url = "http://www.lrec-conf.org/proceedings/lrec2012/pdf/651_Paper.pdf",
pages = "907--910",
abstract = "Romanian has been traditionally seen as bearing three lexical genders: masculine, feminine and neuter, although it has always been known to have only two agreement patterns (for masculine and feminine). A recent analysis of the Romanian gender system described in (Bateman and Polinsky, 2010), based on older observations, argues that there are two lexically unspecified noun classes in the singular and two different ones in the plural and that what is generally called neuter in Romanian shares the class in the singular with masculines, and the class in the plural with feminines based not only on agreement features but also on form. Previous machine learning classifiers that have attempted to discriminate Romanian nouns according to gender have so far taken as input only the singular form, presupposing the traditional tripartite analysis. We propose a classifier based on two parallel support vector machines using n-gram features from the singular and from the plural which outperforms previous classifiers in its high ability to distinguish the neuter. The performance of our system suggests that the two-gender analysis of Romanian, on which it is based, is on the right track.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="dinu-etal-2012-romanian">
<titleInfo>
<title>The Romanian Neuter Examined Through A Two-Gender N-Gram Classification System</title>
</titleInfo>
<name type="personal">
<namePart type="given">Liviu</namePart>
<namePart type="given">P</namePart>
<namePart type="family">Dinu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vlad</namePart>
<namePart type="family">Niculae</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Octavia-Maria</namePart>
<namePart type="family">Şulea</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2012-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Eighth International Conference on Language Resources and Evaluation (LREC’12)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thierry</namePart>
<namePart type="family">Declerck</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mehmet</namePart>
<namePart type="given">Uğur</namePart>
<namePart type="family">Doğan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Asuncion</namePart>
<namePart type="family">Moreno</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stelios</namePart>
<namePart type="family">Piperidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Istanbul, Turkey</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Romanian has been traditionally seen as bearing three lexical genders: masculine, feminine and neuter, although it has always been known to have only two agreement patterns (for masculine and feminine). A recent analysis of the Romanian gender system described in (Bateman and Polinsky, 2010), based on older observations, argues that there are two lexically unspecified noun classes in the singular and two different ones in the plural and that what is generally called neuter in Romanian shares the class in the singular with masculines, and the class in the plural with feminines based not only on agreement features but also on form. Previous machine learning classifiers that have attempted to discriminate Romanian nouns according to gender have so far taken as input only the singular form, presupposing the traditional tripartite analysis. We propose a classifier based on two parallel support vector machines using n-gram features from the singular and from the plural which outperforms previous classifiers in its high ability to distinguish the neuter. The performance of our system suggests that the two-gender analysis of Romanian, on which it is based, is on the right track.</abstract>
<identifier type="citekey">dinu-etal-2012-romanian</identifier>
<location>
<url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/651_Paper.pdf</url>
</location>
<part>
<date>2012-05</date>
<extent unit="page">
<start>907</start>
<end>910</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T The Romanian Neuter Examined Through A Two-Gender N-Gram Classification System
%A Dinu, Liviu P.
%A Niculae, Vlad
%A Şulea, Octavia-Maria
%Y Calzolari, Nicoletta
%Y Choukri, Khalid
%Y Declerck, Thierry
%Y Doğan, Mehmet Uğur
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Moreno, Asuncion
%Y Odijk, Jan
%Y Piperidis, Stelios
%S Proceedings of the Eighth International Conference on Language Resources and Evaluation (LREC’12)
%D 2012
%8 May
%I European Language Resources Association (ELRA)
%C Istanbul, Turkey
%F dinu-etal-2012-romanian
%X Romanian has been traditionally seen as bearing three lexical genders: masculine, feminine and neuter, although it has always been known to have only two agreement patterns (for masculine and feminine). A recent analysis of the Romanian gender system described in (Bateman and Polinsky, 2010), based on older observations, argues that there are two lexically unspecified noun classes in the singular and two different ones in the plural and that what is generally called neuter in Romanian shares the class in the singular with masculines, and the class in the plural with feminines based not only on agreement features but also on form. Previous machine learning classifiers that have attempted to discriminate Romanian nouns according to gender have so far taken as input only the singular form, presupposing the traditional tripartite analysis. We propose a classifier based on two parallel support vector machines using n-gram features from the singular and from the plural which outperforms previous classifiers in its high ability to distinguish the neuter. The performance of our system suggests that the two-gender analysis of Romanian, on which it is based, is on the right track.
%U http://www.lrec-conf.org/proceedings/lrec2012/pdf/651_Paper.pdf
%P 907-910
Markdown (Informal)
[The Romanian Neuter Examined Through A Two-Gender N-Gram Classification System](http://www.lrec-conf.org/proceedings/lrec2012/pdf/651_Paper.pdf) (Dinu et al., LREC 2012)
ACL