@inproceedings{de-luca-2010-corpus,
title = "A Corpus for Evaluating Semantic Multilingual Web Retrieval Systems: The Sense Folder Corpus",
author = "De Luca, Ernesto William",
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Maegaard, Bente and
Mariani, Joseph and
Odijk, Jan and
Piperidis, Stelios and
Rosner, Mike and
Tapias, Daniel",
booktitle = "Proceedings of the Seventh International Conference on Language Resources and Evaluation ({LREC}'10)",
month = may,
year = "2010",
address = "Valletta, Malta",
publisher = "European Language Resources Association (ELRA)",
url = "http://www.lrec-conf.org/proceedings/lrec2010/pdf/816_Paper.pdf",
abstract = "In this paper, we present the multilingual Sense Folder Corpus. After the analysis of different corpora, we describe the requirements that have to be satisfied for evaluating semantic multilingual retrieval approaches. Justified by the unfulfilled requirements explained, we start creating a small bilingual hand-tagged corpus of 502 documents retrieved from Web searches. The documents contained in this collection have been created using Google queries. A single ambiguous word has been searched and related documents (approx. the first 60 documents for every keyword) have been retrieved. The document collection has been extended at the query word level, using single ambiguous words for English (argument, bank, chair, network and rule) and for Italian (argomento, lingua, regola, rete and stampa). The search and annotation process has been done both in a monolingual way for the English and the Italian language. 252 English and 250 Italian documents have been retrieved from Google and saved in their original rank. The performance of semantic multilingual retrieval systems has been evaluated using such a corpus with three baselines (Random, First Sense and Most Frequent Sense) that are formally presented and discussed. The fine-grained evaluation of the Sense Folder approach is discussed in details.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="de-luca-2010-corpus">
<titleInfo>
<title>A Corpus for Evaluating Semantic Multilingual Web Retrieval Systems: The Sense Folder Corpus</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ernesto</namePart>
<namePart type="given">William</namePart>
<namePart type="family">De Luca</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2010-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Seventh International Conference on Language Resources and Evaluation (LREC’10)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stelios</namePart>
<namePart type="family">Piperidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mike</namePart>
<namePart type="family">Rosner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Tapias</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Valletta, Malta</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this paper, we present the multilingual Sense Folder Corpus. After the analysis of different corpora, we describe the requirements that have to be satisfied for evaluating semantic multilingual retrieval approaches. Justified by the unfulfilled requirements explained, we start creating a small bilingual hand-tagged corpus of 502 documents retrieved from Web searches. The documents contained in this collection have been created using Google queries. A single ambiguous word has been searched and related documents (approx. the first 60 documents for every keyword) have been retrieved. The document collection has been extended at the query word level, using single ambiguous words for English (argument, bank, chair, network and rule) and for Italian (argomento, lingua, regola, rete and stampa). The search and annotation process has been done both in a monolingual way for the English and the Italian language. 252 English and 250 Italian documents have been retrieved from Google and saved in their original rank. The performance of semantic multilingual retrieval systems has been evaluated using such a corpus with three baselines (Random, First Sense and Most Frequent Sense) that are formally presented and discussed. The fine-grained evaluation of the Sense Folder approach is discussed in details.</abstract>
<identifier type="citekey">de-luca-2010-corpus</identifier>
<location>
<url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/816_Paper.pdf</url>
</location>
<part>
<date>2010-05</date>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A Corpus for Evaluating Semantic Multilingual Web Retrieval Systems: The Sense Folder Corpus
%A De Luca, Ernesto William
%Y Calzolari, Nicoletta
%Y Choukri, Khalid
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Odijk, Jan
%Y Piperidis, Stelios
%Y Rosner, Mike
%Y Tapias, Daniel
%S Proceedings of the Seventh International Conference on Language Resources and Evaluation (LREC’10)
%D 2010
%8 May
%I European Language Resources Association (ELRA)
%C Valletta, Malta
%F de-luca-2010-corpus
%X In this paper, we present the multilingual Sense Folder Corpus. After the analysis of different corpora, we describe the requirements that have to be satisfied for evaluating semantic multilingual retrieval approaches. Justified by the unfulfilled requirements explained, we start creating a small bilingual hand-tagged corpus of 502 documents retrieved from Web searches. The documents contained in this collection have been created using Google queries. A single ambiguous word has been searched and related documents (approx. the first 60 documents for every keyword) have been retrieved. The document collection has been extended at the query word level, using single ambiguous words for English (argument, bank, chair, network and rule) and for Italian (argomento, lingua, regola, rete and stampa). The search and annotation process has been done both in a monolingual way for the English and the Italian language. 252 English and 250 Italian documents have been retrieved from Google and saved in their original rank. The performance of semantic multilingual retrieval systems has been evaluated using such a corpus with three baselines (Random, First Sense and Most Frequent Sense) that are formally presented and discussed. The fine-grained evaluation of the Sense Folder approach is discussed in details.
%U http://www.lrec-conf.org/proceedings/lrec2010/pdf/816_Paper.pdf
Markdown (Informal)
[A Corpus for Evaluating Semantic Multilingual Web Retrieval Systems: The Sense Folder Corpus](http://www.lrec-conf.org/proceedings/lrec2010/pdf/816_Paper.pdf) (De Luca, LREC 2010)
ACL