@inproceedings{hayashibe-2020-japanese,
title = "{J}apanese Realistic Textual Entailment Corpus",
author = "Hayashibe, Yuta",
editor = "Calzolari, Nicoletta and
B{\'e}chet, Fr{\'e}d{\'e}ric and
Blache, Philippe and
Choukri, Khalid and
Cieri, Christopher and
Declerck, Thierry and
Goggi, Sara and
Isahara, Hitoshi and
Maegaard, Bente and
Mariani, Joseph and
Mazo, H{\'e}l{\`e}ne and
Moreno, Asuncion and
Odijk, Jan and
Piperidis, Stelios",
booktitle = "Proceedings of the Twelfth Language Resources and Evaluation Conference",
month = may,
year = "2020",
address = "Marseille, France",
publisher = "European Language Resources Association",
url = "https://aclanthology.org/2020.lrec-1.843",
pages = "6827--6834",
abstract = "We perform the textual entailment (TE) corpus construction for the Japanese Language with the following three characteristics: First, the corpus consists of realistic sentences; that is, all sentences are spontaneous or almost equivalent. It does not need manual writing which causes hidden biases. Second, the corpus contains adversarial examples. We collect challenging examples that can not be solved by a recent pre-trained language model. Third, the corpus contains explanations for a part of non-entailment labels. We perform the reasoning annotation where annotators are asked to check which tokens in hypotheses are the reason why the relations are labeled. It makes easy to validate the annotation and analyze system errors. The resulting corpus consists of 48,000 realistic Japanese examples. It is the largest among publicly available Japanese TE corpora. Additionally, it is the first Japanese TE corpus that includes reasons for the annotation as we know. We are planning to distribute this corpus to the NLP community at the time of publication.",
language = "English",
ISBN = "979-10-95546-34-4",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="hayashibe-2020-japanese">
<titleInfo>
<title>Japanese Realistic Textual Entailment Corpus</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yuta</namePart>
<namePart type="family">Hayashibe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<language>
<languageTerm type="text">English</languageTerm>
<languageTerm type="code" authority="iso639-2b">eng</languageTerm>
</language>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Twelfth Language Resources and Evaluation Conference</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Frédéric</namePart>
<namePart type="family">Béchet</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Philippe</namePart>
<namePart type="family">Blache</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christopher</namePart>
<namePart type="family">Cieri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thierry</namePart>
<namePart type="family">Declerck</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sara</namePart>
<namePart type="family">Goggi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hitoshi</namePart>
<namePart type="family">Isahara</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hélène</namePart>
<namePart type="family">Mazo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Asuncion</namePart>
<namePart type="family">Moreno</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stelios</namePart>
<namePart type="family">Piperidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association</publisher>
<place>
<placeTerm type="text">Marseille, France</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-10-95546-34-4</identifier>
</relatedItem>
<abstract>We perform the textual entailment (TE) corpus construction for the Japanese Language with the following three characteristics: First, the corpus consists of realistic sentences; that is, all sentences are spontaneous or almost equivalent. It does not need manual writing which causes hidden biases. Second, the corpus contains adversarial examples. We collect challenging examples that can not be solved by a recent pre-trained language model. Third, the corpus contains explanations for a part of non-entailment labels. We perform the reasoning annotation where annotators are asked to check which tokens in hypotheses are the reason why the relations are labeled. It makes easy to validate the annotation and analyze system errors. The resulting corpus consists of 48,000 realistic Japanese examples. It is the largest among publicly available Japanese TE corpora. Additionally, it is the first Japanese TE corpus that includes reasons for the annotation as we know. We are planning to distribute this corpus to the NLP community at the time of publication.</abstract>
<identifier type="citekey">hayashibe-2020-japanese</identifier>
<location>
<url>https://aclanthology.org/2020.lrec-1.843</url>
</location>
<part>
<date>2020-05</date>
<extent unit="page">
<start>6827</start>
<end>6834</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Japanese Realistic Textual Entailment Corpus
%A Hayashibe, Yuta
%Y Calzolari, Nicoletta
%Y Béchet, Frédéric
%Y Blache, Philippe
%Y Choukri, Khalid
%Y Cieri, Christopher
%Y Declerck, Thierry
%Y Goggi, Sara
%Y Isahara, Hitoshi
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Mazo, Hélène
%Y Moreno, Asuncion
%Y Odijk, Jan
%Y Piperidis, Stelios
%S Proceedings of the Twelfth Language Resources and Evaluation Conference
%D 2020
%8 May
%I European Language Resources Association
%C Marseille, France
%@ 979-10-95546-34-4
%G English
%F hayashibe-2020-japanese
%X We perform the textual entailment (TE) corpus construction for the Japanese Language with the following three characteristics: First, the corpus consists of realistic sentences; that is, all sentences are spontaneous or almost equivalent. It does not need manual writing which causes hidden biases. Second, the corpus contains adversarial examples. We collect challenging examples that can not be solved by a recent pre-trained language model. Third, the corpus contains explanations for a part of non-entailment labels. We perform the reasoning annotation where annotators are asked to check which tokens in hypotheses are the reason why the relations are labeled. It makes easy to validate the annotation and analyze system errors. The resulting corpus consists of 48,000 realistic Japanese examples. It is the largest among publicly available Japanese TE corpora. Additionally, it is the first Japanese TE corpus that includes reasons for the annotation as we know. We are planning to distribute this corpus to the NLP community at the time of publication.
%U https://aclanthology.org/2020.lrec-1.843
%P 6827-6834
Markdown (Informal)
[Japanese Realistic Textual Entailment Corpus](https://aclanthology.org/2020.lrec-1.843) (Hayashibe, LREC 2020)
ACL
- Yuta Hayashibe. 2020. Japanese Realistic Textual Entailment Corpus. In Proceedings of the Twelfth Language Resources and Evaluation Conference, pages 6827–6834, Marseille, France. European Language Resources Association.