@inproceedings{wilkens-etal-2016-b2sg,
title = "{B}2{SG}: a {TOEFL}-like Task for {P}ortuguese",
author = "Wilkens, Rodrigo and
Zilio, Leonardo and
Ferreira, Eduardo and
Villavicencio, Aline",
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Declerck, Thierry and
Goggi, Sara and
Grobelnik, Marko and
Maegaard, Bente and
Mariani, Joseph and
Mazo, Helene and
Moreno, Asuncion and
Odijk, Jan and
Piperidis, Stelios",
booktitle = "Proceedings of the Tenth International Conference on Language Resources and Evaluation ({LREC}'16)",
month = may,
year = "2016",
address = "Portoro{\v{z}}, Slovenia",
publisher = "European Language Resources Association (ELRA)",
url = "https://aclanthology.org/L16-1580",
pages = "3659--3662",
abstract = "Resources such as WordNet are useful for NLP applications, but their manual construction consumes time and personnel, and frequently results in low coverage. One alternative is the automatic construction of large resources from corpora like distributional thesauri, containing semantically associated words. However, as they may contain noise, there is a strong need for automatic ways of evaluating the quality of the resulting resource. This paper introduces a gold standard that can aid in this task. The BabelNet-Based Semantic Gold Standard (B2SG) was automatically constructed based on BabelNet and partly evaluated by human judges. It consists of sets of tests that present one target word, one related word and three unrelated words. B2SG contains 2,875 validated relations: 800 for verbs and 2,075 for nouns; these relations are divided among synonymy, antonymy and hypernymy. They can be used as the basis for evaluating the accuracy of the similarity relations on distributional thesauri by comparing the proximity of the target word with the related and unrelated options and observing if the related word has the highest similarity value among them. As a case study two distributional thesauri were also developed: one using surface forms from a large (1.5 billion word) corpus and the other using lemmatized forms from a smaller (409 million word) corpus. Both distributional thesauri were then evaluated against B2SG, and the one using lemmatized forms performed slightly better.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="wilkens-etal-2016-b2sg">
<titleInfo>
<title>B2SG: a TOEFL-like Task for Portuguese</title>
</titleInfo>
<name type="personal">
<namePart type="given">Rodrigo</namePart>
<namePart type="family">Wilkens</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Leonardo</namePart>
<namePart type="family">Zilio</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eduardo</namePart>
<namePart type="family">Ferreira</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aline</namePart>
<namePart type="family">Villavicencio</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2016-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC’16)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thierry</namePart>
<namePart type="family">Declerck</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sara</namePart>
<namePart type="family">Goggi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marko</namePart>
<namePart type="family">Grobelnik</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Helene</namePart>
<namePart type="family">Mazo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Asuncion</namePart>
<namePart type="family">Moreno</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stelios</namePart>
<namePart type="family">Piperidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Portorož, Slovenia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Resources such as WordNet are useful for NLP applications, but their manual construction consumes time and personnel, and frequently results in low coverage. One alternative is the automatic construction of large resources from corpora like distributional thesauri, containing semantically associated words. However, as they may contain noise, there is a strong need for automatic ways of evaluating the quality of the resulting resource. This paper introduces a gold standard that can aid in this task. The BabelNet-Based Semantic Gold Standard (B2SG) was automatically constructed based on BabelNet and partly evaluated by human judges. It consists of sets of tests that present one target word, one related word and three unrelated words. B2SG contains 2,875 validated relations: 800 for verbs and 2,075 for nouns; these relations are divided among synonymy, antonymy and hypernymy. They can be used as the basis for evaluating the accuracy of the similarity relations on distributional thesauri by comparing the proximity of the target word with the related and unrelated options and observing if the related word has the highest similarity value among them. As a case study two distributional thesauri were also developed: one using surface forms from a large (1.5 billion word) corpus and the other using lemmatized forms from a smaller (409 million word) corpus. Both distributional thesauri were then evaluated against B2SG, and the one using lemmatized forms performed slightly better.</abstract>
<identifier type="citekey">wilkens-etal-2016-b2sg</identifier>
<location>
<url>https://aclanthology.org/L16-1580</url>
</location>
<part>
<date>2016-05</date>
<extent unit="page">
<start>3659</start>
<end>3662</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T B2SG: a TOEFL-like Task for Portuguese
%A Wilkens, Rodrigo
%A Zilio, Leonardo
%A Ferreira, Eduardo
%A Villavicencio, Aline
%Y Calzolari, Nicoletta
%Y Choukri, Khalid
%Y Declerck, Thierry
%Y Goggi, Sara
%Y Grobelnik, Marko
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Mazo, Helene
%Y Moreno, Asuncion
%Y Odijk, Jan
%Y Piperidis, Stelios
%S Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC’16)
%D 2016
%8 May
%I European Language Resources Association (ELRA)
%C Portorož, Slovenia
%F wilkens-etal-2016-b2sg
%X Resources such as WordNet are useful for NLP applications, but their manual construction consumes time and personnel, and frequently results in low coverage. One alternative is the automatic construction of large resources from corpora like distributional thesauri, containing semantically associated words. However, as they may contain noise, there is a strong need for automatic ways of evaluating the quality of the resulting resource. This paper introduces a gold standard that can aid in this task. The BabelNet-Based Semantic Gold Standard (B2SG) was automatically constructed based on BabelNet and partly evaluated by human judges. It consists of sets of tests that present one target word, one related word and three unrelated words. B2SG contains 2,875 validated relations: 800 for verbs and 2,075 for nouns; these relations are divided among synonymy, antonymy and hypernymy. They can be used as the basis for evaluating the accuracy of the similarity relations on distributional thesauri by comparing the proximity of the target word with the related and unrelated options and observing if the related word has the highest similarity value among them. As a case study two distributional thesauri were also developed: one using surface forms from a large (1.5 billion word) corpus and the other using lemmatized forms from a smaller (409 million word) corpus. Both distributional thesauri were then evaluated against B2SG, and the one using lemmatized forms performed slightly better.
%U https://aclanthology.org/L16-1580
%P 3659-3662
Markdown (Informal)
[B2SG: a TOEFL-like Task for Portuguese](https://aclanthology.org/L16-1580) (Wilkens et al., LREC 2016)
ACL
- Rodrigo Wilkens, Leonardo Zilio, Eduardo Ferreira, and Aline Villavicencio. 2016. B2SG: a TOEFL-like Task for Portuguese. In Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC'16), pages 3659–3662, Portorož, Slovenia. European Language Resources Association (ELRA).