@inproceedings{rus-etal-2014-paraphrase,
title = "On Paraphrase Identification Corpora",
author = "Rus, Vasile and
Banjade, Rajendra and
Lintean, Mihai",
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Declerck, Thierry and
Loftsson, Hrafn and
Maegaard, Bente and
Mariani, Joseph and
Moreno, Asuncion and
Odijk, Jan and
Piperidis, Stelios",
booktitle = "Proceedings of the Ninth International Conference on Language Resources and Evaluation ({LREC}'14)",
month = may,
year = "2014",
address = "Reykjavik, Iceland",
publisher = "European Language Resources Association (ELRA)",
url = "http://www.lrec-conf.org/proceedings/lrec2014/pdf/1000_Paper.pdf",
pages = "2422--2429",
abstract = "We analyze in this paper a number of data sets proposed over the last decade or so for the task of paraphrase identification. The goal of the analysis is to identify the advantages as well as shortcomings of the previously proposed data sets. Based on the analysis, we then make recommendations about how to improve the process of creating and using such data sets for evaluating in the future approaches to the task of paraphrase identification or the more general task of semantic similarity. The recommendations are meant to improve our understanding of what a paraphrase is, offer a more fair ground for comparing approaches, increase the diversity of actual linguistic phenomena that future data sets will cover, and offer ways to improve our understanding of the contributions of various modules or approaches proposed for solving the task of paraphrase identification or similar tasks.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="rus-etal-2014-paraphrase">
<titleInfo>
<title>On Paraphrase Identification Corpora</title>
</titleInfo>
<name type="personal">
<namePart type="given">Vasile</namePart>
<namePart type="family">Rus</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rajendra</namePart>
<namePart type="family">Banjade</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mihai</namePart>
<namePart type="family">Lintean</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2014-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Ninth International Conference on Language Resources and Evaluation (LREC’14)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thierry</namePart>
<namePart type="family">Declerck</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hrafn</namePart>
<namePart type="family">Loftsson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Asuncion</namePart>
<namePart type="family">Moreno</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stelios</namePart>
<namePart type="family">Piperidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Reykjavik, Iceland</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We analyze in this paper a number of data sets proposed over the last decade or so for the task of paraphrase identification. The goal of the analysis is to identify the advantages as well as shortcomings of the previously proposed data sets. Based on the analysis, we then make recommendations about how to improve the process of creating and using such data sets for evaluating in the future approaches to the task of paraphrase identification or the more general task of semantic similarity. The recommendations are meant to improve our understanding of what a paraphrase is, offer a more fair ground for comparing approaches, increase the diversity of actual linguistic phenomena that future data sets will cover, and offer ways to improve our understanding of the contributions of various modules or approaches proposed for solving the task of paraphrase identification or similar tasks.</abstract>
<identifier type="citekey">rus-etal-2014-paraphrase</identifier>
<location>
<url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/1000_Paper.pdf</url>
</location>
<part>
<date>2014-05</date>
<extent unit="page">
<start>2422</start>
<end>2429</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T On Paraphrase Identification Corpora
%A Rus, Vasile
%A Banjade, Rajendra
%A Lintean, Mihai
%Y Calzolari, Nicoletta
%Y Choukri, Khalid
%Y Declerck, Thierry
%Y Loftsson, Hrafn
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Moreno, Asuncion
%Y Odijk, Jan
%Y Piperidis, Stelios
%S Proceedings of the Ninth International Conference on Language Resources and Evaluation (LREC’14)
%D 2014
%8 May
%I European Language Resources Association (ELRA)
%C Reykjavik, Iceland
%F rus-etal-2014-paraphrase
%X We analyze in this paper a number of data sets proposed over the last decade or so for the task of paraphrase identification. The goal of the analysis is to identify the advantages as well as shortcomings of the previously proposed data sets. Based on the analysis, we then make recommendations about how to improve the process of creating and using such data sets for evaluating in the future approaches to the task of paraphrase identification or the more general task of semantic similarity. The recommendations are meant to improve our understanding of what a paraphrase is, offer a more fair ground for comparing approaches, increase the diversity of actual linguistic phenomena that future data sets will cover, and offer ways to improve our understanding of the contributions of various modules or approaches proposed for solving the task of paraphrase identification or similar tasks.
%U http://www.lrec-conf.org/proceedings/lrec2014/pdf/1000_Paper.pdf
%P 2422-2429
Markdown (Informal)
[On Paraphrase Identification Corpora](http://www.lrec-conf.org/proceedings/lrec2014/pdf/1000_Paper.pdf) (Rus et al., LREC 2014)
ACL
- Vasile Rus, Rajendra Banjade, and Mihai Lintean. 2014. On Paraphrase Identification Corpora. In Proceedings of the Ninth International Conference on Language Resources and Evaluation (LREC'14), pages 2422–2429, Reykjavik, Iceland. European Language Resources Association (ELRA).