@inproceedings{babko-malaya-etal-2012-identifying,
title = "Identifying Nuggets of Information in {GALE} Distillation Evaluation",
author = "Babko-Malaya, Olga and
Milette, Greg and
Schneider, Michael and
Scogin, Sarah",
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Declerck, Thierry and
Do{\u{g}}an, Mehmet U{\u{g}}ur and
Maegaard, Bente and
Mariani, Joseph and
Moreno, Asuncion and
Odijk, Jan and
Piperidis, Stelios",
booktitle = "Proceedings of the Eighth International Conference on Language Resources and Evaluation ({LREC}'12)",
month = may,
year = "2012",
address = "Istanbul, Turkey",
publisher = "European Language Resources Association (ELRA)",
url = "http://www.lrec-conf.org/proceedings/lrec2012/pdf/482_Paper.pdf",
pages = "2322--2327",
abstract = "This paper describes an approach to automatic nuggetization and implemented system employed in GALE Distillation evaluation to measure the information content of text returned in response to an open-ended question. The system identifies nuggets, or atomic units of information, categorizes them according to their semantic type, and selects different types of nuggets depending on the type of the question. We further show how this approach addresses the main challenges for using automatic nuggetization for QA evaluation: the variability of relevant nuggets and their dependence on the question. Specifically, we propose a template-based approach to nuggetization, where different semantic categories of nuggets are extracted dependent on the template of a question. During evaluation, human annotators judge each snippet returned in response to a query as relevant or irrelevant, whereas automatic template-based nuggetization is further used to identify the semantic units of information that people would have selected as relevant' or irrelevant' nuggets for a given query. Finally, the paper presents the performance results of the nuggetization system which compare the number of automatically generated nuggets and human nuggets and show that our automatic nuggetization is consistent with human judgments.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="babko-malaya-etal-2012-identifying">
<titleInfo>
<title>Identifying Nuggets of Information in GALE Distillation Evaluation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Olga</namePart>
<namePart type="family">Babko-Malaya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Greg</namePart>
<namePart type="family">Milette</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Michael</namePart>
<namePart type="family">Schneider</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sarah</namePart>
<namePart type="family">Scogin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2012-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Eighth International Conference on Language Resources and Evaluation (LREC’12)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thierry</namePart>
<namePart type="family">Declerck</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mehmet</namePart>
<namePart type="given">Uğur</namePart>
<namePart type="family">Doğan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Asuncion</namePart>
<namePart type="family">Moreno</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stelios</namePart>
<namePart type="family">Piperidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Istanbul, Turkey</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper describes an approach to automatic nuggetization and implemented system employed in GALE Distillation evaluation to measure the information content of text returned in response to an open-ended question. The system identifies nuggets, or atomic units of information, categorizes them according to their semantic type, and selects different types of nuggets depending on the type of the question. We further show how this approach addresses the main challenges for using automatic nuggetization for QA evaluation: the variability of relevant nuggets and their dependence on the question. Specifically, we propose a template-based approach to nuggetization, where different semantic categories of nuggets are extracted dependent on the template of a question. During evaluation, human annotators judge each snippet returned in response to a query as relevant or irrelevant, whereas automatic template-based nuggetization is further used to identify the semantic units of information that people would have selected as relevant’ or irrelevant’ nuggets for a given query. Finally, the paper presents the performance results of the nuggetization system which compare the number of automatically generated nuggets and human nuggets and show that our automatic nuggetization is consistent with human judgments.</abstract>
<identifier type="citekey">babko-malaya-etal-2012-identifying</identifier>
<location>
<url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/482_Paper.pdf</url>
</location>
<part>
<date>2012-05</date>
<extent unit="page">
<start>2322</start>
<end>2327</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Identifying Nuggets of Information in GALE Distillation Evaluation
%A Babko-Malaya, Olga
%A Milette, Greg
%A Schneider, Michael
%A Scogin, Sarah
%Y Calzolari, Nicoletta
%Y Choukri, Khalid
%Y Declerck, Thierry
%Y Doğan, Mehmet Uğur
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Moreno, Asuncion
%Y Odijk, Jan
%Y Piperidis, Stelios
%S Proceedings of the Eighth International Conference on Language Resources and Evaluation (LREC’12)
%D 2012
%8 May
%I European Language Resources Association (ELRA)
%C Istanbul, Turkey
%F babko-malaya-etal-2012-identifying
%X This paper describes an approach to automatic nuggetization and implemented system employed in GALE Distillation evaluation to measure the information content of text returned in response to an open-ended question. The system identifies nuggets, or atomic units of information, categorizes them according to their semantic type, and selects different types of nuggets depending on the type of the question. We further show how this approach addresses the main challenges for using automatic nuggetization for QA evaluation: the variability of relevant nuggets and their dependence on the question. Specifically, we propose a template-based approach to nuggetization, where different semantic categories of nuggets are extracted dependent on the template of a question. During evaluation, human annotators judge each snippet returned in response to a query as relevant or irrelevant, whereas automatic template-based nuggetization is further used to identify the semantic units of information that people would have selected as relevant’ or irrelevant’ nuggets for a given query. Finally, the paper presents the performance results of the nuggetization system which compare the number of automatically generated nuggets and human nuggets and show that our automatic nuggetization is consistent with human judgments.
%U http://www.lrec-conf.org/proceedings/lrec2012/pdf/482_Paper.pdf
%P 2322-2327
Markdown (Informal)
[Identifying Nuggets of Information in GALE Distillation Evaluation](http://www.lrec-conf.org/proceedings/lrec2012/pdf/482_Paper.pdf) (Babko-Malaya et al., LREC 2012)
ACL