@inproceedings{sukkarieh-bolge-2010-building,
title = "Building a Textual Entailment Suite for the Evaluation of Automatic Content Scoring Technologies",
author = "Sukkarieh, Jana Z. and
Bolge, Eleanor",
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Maegaard, Bente and
Mariani, Joseph and
Odijk, Jan and
Piperidis, Stelios and
Rosner, Mike and
Tapias, Daniel",
booktitle = "Proceedings of the Seventh International Conference on Language Resources and Evaluation ({LREC}`10)",
month = may,
year = "2010",
address = "Valletta, Malta",
publisher = "European Language Resources Association (ELRA)",
url = "https://aclanthology.org/L10-1213/",
abstract = "Automatic content scoring for free-text responses has started to emerge as an application of Natural Language Processing in its own right, much like question answering or machine translation. The task, in general, is reduced to comparing a students answer to a model answer. Although a considerable amount of work has been done, common benchmarks and evaluation measures for this application do not currently exist. It is yet impossible to perform a comparative evaluation or progress tracking of this application across systems {\textemdash} an application that we view as a textual entailment task. This paper concentrates on introducing an Educational Testing Service-built test suite that makes a step towards establishing such a benchmark. The suite can be used as regression and performance evaluations both intra-c-rater{\^A}{\textregistered} or inter automatic content scoring technologies. It is important to note that existing textual entailment test suites like PASCAL RTE or FraCas, though beneficial, are not suitable for our purposes since we deal with atypical naturally-occurring student responses that need to be categorized in order to serve as regression test cases."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="sukkarieh-bolge-2010-building">
<titleInfo>
<title>Building a Textual Entailment Suite for the Evaluation of Automatic Content Scoring Technologies</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jana</namePart>
<namePart type="given">Z</namePart>
<namePart type="family">Sukkarieh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eleanor</namePart>
<namePart type="family">Bolge</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2010-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Seventh International Conference on Language Resources and Evaluation (LREC‘10)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stelios</namePart>
<namePart type="family">Piperidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mike</namePart>
<namePart type="family">Rosner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Tapias</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Valletta, Malta</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Automatic content scoring for free-text responses has started to emerge as an application of Natural Language Processing in its own right, much like question answering or machine translation. The task, in general, is reduced to comparing a students answer to a model answer. Although a considerable amount of work has been done, common benchmarks and evaluation measures for this application do not currently exist. It is yet impossible to perform a comparative evaluation or progress tracking of this application across systems — an application that we view as a textual entailment task. This paper concentrates on introducing an Educational Testing Service-built test suite that makes a step towards establishing such a benchmark. The suite can be used as regression and performance evaluations both intra-c-rater¯ or inter automatic content scoring technologies. It is important to note that existing textual entailment test suites like PASCAL RTE or FraCas, though beneficial, are not suitable for our purposes since we deal with atypical naturally-occurring student responses that need to be categorized in order to serve as regression test cases.</abstract>
<identifier type="citekey">sukkarieh-bolge-2010-building</identifier>
<location>
<url>https://aclanthology.org/L10-1213/</url>
</location>
<part>
<date>2010-05</date>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Building a Textual Entailment Suite for the Evaluation of Automatic Content Scoring Technologies
%A Sukkarieh, Jana Z.
%A Bolge, Eleanor
%Y Calzolari, Nicoletta
%Y Choukri, Khalid
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Odijk, Jan
%Y Piperidis, Stelios
%Y Rosner, Mike
%Y Tapias, Daniel
%S Proceedings of the Seventh International Conference on Language Resources and Evaluation (LREC‘10)
%D 2010
%8 May
%I European Language Resources Association (ELRA)
%C Valletta, Malta
%F sukkarieh-bolge-2010-building
%X Automatic content scoring for free-text responses has started to emerge as an application of Natural Language Processing in its own right, much like question answering or machine translation. The task, in general, is reduced to comparing a students answer to a model answer. Although a considerable amount of work has been done, common benchmarks and evaluation measures for this application do not currently exist. It is yet impossible to perform a comparative evaluation or progress tracking of this application across systems — an application that we view as a textual entailment task. This paper concentrates on introducing an Educational Testing Service-built test suite that makes a step towards establishing such a benchmark. The suite can be used as regression and performance evaluations both intra-c-rater¯ or inter automatic content scoring technologies. It is important to note that existing textual entailment test suites like PASCAL RTE or FraCas, though beneficial, are not suitable for our purposes since we deal with atypical naturally-occurring student responses that need to be categorized in order to serve as regression test cases.
%U https://aclanthology.org/L10-1213/
Markdown (Informal)
[Building a Textual Entailment Suite for the Evaluation of Automatic Content Scoring Technologies](https://aclanthology.org/L10-1213/) (Sukkarieh & Bolge, LREC 2010)
ACL