@inproceedings{cramer-etal-2006-building,
title = "Building an Evaluation Corpus for {G}erman Question Answering by Harvesting {W}ikipedia",
author = "Cramer, Irene and
Leidner, Jochen L. and
Klakow, Dietrich",
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Gangemi, Aldo and
Maegaard, Bente and
Mariani, Joseph and
Odijk, Jan and
Tapias, Daniel",
booktitle = "Proceedings of the Fifth International Conference on Language Resources and Evaluation ({LREC}{'}06)",
month = may,
year = "2006",
address = "Genoa, Italy",
publisher = "European Language Resources Association (ELRA)",
url = "http://www.lrec-conf.org/proceedings/lrec2006/pdf/206_pdf.pdf",
abstract = "The growing interest in open-domain question answering is limited by the lack of evaluation and training resources. To overcome this resource bottleneck for German, we propose a novel methodology to acquire new question-answer pairs for system evaluation that relies on volunteer collaboration over the Internet. Utilizing Wikipedia, a popular free online encyclopedia available in several languages, we show that the data acquisition problem can be cast as a Web experiment. We present a Web-based annotation tool and carry out a distributed data collection experiment. The data gathered from the mostly anonymous contributors is compared to a similar dataset produced in-house by domain experts on the one hand, and the German questions from the from the CLEF QA 2004 effort on the other hand. Our analysis of the datasets suggests that using our novel method a medium-scale evaluation resource can be built at very small cost in a short period of time. The technique and software developed here is readily applicable to other languages where free online encyclopedias are available, and our resulting corpus is likewise freely available.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="cramer-etal-2006-building">
<titleInfo>
<title>Building an Evaluation Corpus for German Question Answering by Harvesting Wikipedia</title>
</titleInfo>
<name type="personal">
<namePart type="given">Irene</namePart>
<namePart type="family">Cramer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jochen</namePart>
<namePart type="given">L</namePart>
<namePart type="family">Leidner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dietrich</namePart>
<namePart type="family">Klakow</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2006-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fifth International Conference on Language Resources and Evaluation (LREC’06)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aldo</namePart>
<namePart type="family">Gangemi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Tapias</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Genoa, Italy</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The growing interest in open-domain question answering is limited by the lack of evaluation and training resources. To overcome this resource bottleneck for German, we propose a novel methodology to acquire new question-answer pairs for system evaluation that relies on volunteer collaboration over the Internet. Utilizing Wikipedia, a popular free online encyclopedia available in several languages, we show that the data acquisition problem can be cast as a Web experiment. We present a Web-based annotation tool and carry out a distributed data collection experiment. The data gathered from the mostly anonymous contributors is compared to a similar dataset produced in-house by domain experts on the one hand, and the German questions from the from the CLEF QA 2004 effort on the other hand. Our analysis of the datasets suggests that using our novel method a medium-scale evaluation resource can be built at very small cost in a short period of time. The technique and software developed here is readily applicable to other languages where free online encyclopedias are available, and our resulting corpus is likewise freely available.</abstract>
<identifier type="citekey">cramer-etal-2006-building</identifier>
<location>
<url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/206_pdf.pdf</url>
</location>
<part>
<date>2006-05</date>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Building an Evaluation Corpus for German Question Answering by Harvesting Wikipedia
%A Cramer, Irene
%A Leidner, Jochen L.
%A Klakow, Dietrich
%Y Calzolari, Nicoletta
%Y Choukri, Khalid
%Y Gangemi, Aldo
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Odijk, Jan
%Y Tapias, Daniel
%S Proceedings of the Fifth International Conference on Language Resources and Evaluation (LREC’06)
%D 2006
%8 May
%I European Language Resources Association (ELRA)
%C Genoa, Italy
%F cramer-etal-2006-building
%X The growing interest in open-domain question answering is limited by the lack of evaluation and training resources. To overcome this resource bottleneck for German, we propose a novel methodology to acquire new question-answer pairs for system evaluation that relies on volunteer collaboration over the Internet. Utilizing Wikipedia, a popular free online encyclopedia available in several languages, we show that the data acquisition problem can be cast as a Web experiment. We present a Web-based annotation tool and carry out a distributed data collection experiment. The data gathered from the mostly anonymous contributors is compared to a similar dataset produced in-house by domain experts on the one hand, and the German questions from the from the CLEF QA 2004 effort on the other hand. Our analysis of the datasets suggests that using our novel method a medium-scale evaluation resource can be built at very small cost in a short period of time. The technique and software developed here is readily applicable to other languages where free online encyclopedias are available, and our resulting corpus is likewise freely available.
%U http://www.lrec-conf.org/proceedings/lrec2006/pdf/206_pdf.pdf
Markdown (Informal)
[Building an Evaluation Corpus for German Question Answering by Harvesting Wikipedia](http://www.lrec-conf.org/proceedings/lrec2006/pdf/206_pdf.pdf) (Cramer et al., LREC 2006)
ACL