@inproceedings{otrusina-smrz-2010-new,
title = "A New Approach to Pseudoword Generation",
author = "Otrusina, Lubomir and
Smrz, Pavel",
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Maegaard, Bente and
Mariani, Joseph and
Odijk, Jan and
Piperidis, Stelios and
Rosner, Mike and
Tapias, Daniel",
booktitle = "Proceedings of the Seventh International Conference on Language Resources and Evaluation ({LREC}'10)",
month = may,
year = "2010",
address = "Valletta, Malta",
publisher = "European Language Resources Association (ELRA)",
url = "http://www.lrec-conf.org/proceedings/lrec2010/pdf/339_Paper.pdf",
abstract = "Sense-tagged corpora are used to evaluate word sense disambiguation (WSD) systems. Manual creation of such resources is often prohibitively expensive. That is why the concept of pseudowords - conflations of two or more unambiguous words - has been integrated into WSD evaluation experiments. This paper presents a new method of pseudoword generation which takes into account semantic-relatedness of the candidate words forming parts of the pseudowords to the particular senses of the word to be disambiguated. We compare the new approach to its alternatives and show that the results on pseudowords, that are more similar to real ambiguous words, better correspond to the actual results. Two techniques assessing the similarity are studied - the first one takes advantage of manually created dictionaries (wordnets), the second one builds on the automatically computed statistical data obtained from large corpora. Pros and cons of the two techniques are discussed and the results on a standard task are demonstrated.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="otrusina-smrz-2010-new">
<titleInfo>
<title>A New Approach to Pseudoword Generation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Lubomir</namePart>
<namePart type="family">Otrusina</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pavel</namePart>
<namePart type="family">Smrz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2010-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Seventh International Conference on Language Resources and Evaluation (LREC’10)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stelios</namePart>
<namePart type="family">Piperidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mike</namePart>
<namePart type="family">Rosner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Tapias</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Valletta, Malta</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Sense-tagged corpora are used to evaluate word sense disambiguation (WSD) systems. Manual creation of such resources is often prohibitively expensive. That is why the concept of pseudowords - conflations of two or more unambiguous words - has been integrated into WSD evaluation experiments. This paper presents a new method of pseudoword generation which takes into account semantic-relatedness of the candidate words forming parts of the pseudowords to the particular senses of the word to be disambiguated. We compare the new approach to its alternatives and show that the results on pseudowords, that are more similar to real ambiguous words, better correspond to the actual results. Two techniques assessing the similarity are studied - the first one takes advantage of manually created dictionaries (wordnets), the second one builds on the automatically computed statistical data obtained from large corpora. Pros and cons of the two techniques are discussed and the results on a standard task are demonstrated.</abstract>
<identifier type="citekey">otrusina-smrz-2010-new</identifier>
<location>
<url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/339_Paper.pdf</url>
</location>
<part>
<date>2010-05</date>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A New Approach to Pseudoword Generation
%A Otrusina, Lubomir
%A Smrz, Pavel
%Y Calzolari, Nicoletta
%Y Choukri, Khalid
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Odijk, Jan
%Y Piperidis, Stelios
%Y Rosner, Mike
%Y Tapias, Daniel
%S Proceedings of the Seventh International Conference on Language Resources and Evaluation (LREC’10)
%D 2010
%8 May
%I European Language Resources Association (ELRA)
%C Valletta, Malta
%F otrusina-smrz-2010-new
%X Sense-tagged corpora are used to evaluate word sense disambiguation (WSD) systems. Manual creation of such resources is often prohibitively expensive. That is why the concept of pseudowords - conflations of two or more unambiguous words - has been integrated into WSD evaluation experiments. This paper presents a new method of pseudoword generation which takes into account semantic-relatedness of the candidate words forming parts of the pseudowords to the particular senses of the word to be disambiguated. We compare the new approach to its alternatives and show that the results on pseudowords, that are more similar to real ambiguous words, better correspond to the actual results. Two techniques assessing the similarity are studied - the first one takes advantage of manually created dictionaries (wordnets), the second one builds on the automatically computed statistical data obtained from large corpora. Pros and cons of the two techniques are discussed and the results on a standard task are demonstrated.
%U http://www.lrec-conf.org/proceedings/lrec2010/pdf/339_Paper.pdf
Markdown (Informal)
[A New Approach to Pseudoword Generation](http://www.lrec-conf.org/proceedings/lrec2010/pdf/339_Paper.pdf) (Otrusina & Smrz, LREC 2010)
ACL
- Lubomir Otrusina and Pavel Smrz. 2010. A New Approach to Pseudoword Generation. In Proceedings of the Seventh International Conference on Language Resources and Evaluation (LREC'10), Valletta, Malta. European Language Resources Association (ELRA).