@inproceedings{tsuchiya-yokoi-2022-developing,
title = "Developing a Dataset of Overridden Information in {W}ikipedia",
author = "Tsuchiya, Masatoshi and
Yokoi, Yasutaka",
editor = "Calzolari, Nicoletta and
B{\'e}chet, Fr{\'e}d{\'e}ric and
Blache, Philippe and
Choukri, Khalid and
Cieri, Christopher and
Declerck, Thierry and
Goggi, Sara and
Isahara, Hitoshi and
Maegaard, Bente and
Mariani, Joseph and
Mazo, H{\'e}l{\`e}ne and
Odijk, Jan and
Piperidis, Stelios",
booktitle = "Proceedings of the Thirteenth Language Resources and Evaluation Conference",
month = jun,
year = "2022",
address = "Marseille, France",
publisher = "European Language Resources Association",
url = "https://aclanthology.org/2022.lrec-1.601",
pages = "5601--5608",
abstract = "This paper proposes a new task of detecting information override. Since all information on the Web is not updated in a timely manner, the necessity is created for information that is overridden by another information source to be discarded. The task is formalized as a binary classification problem to determine whether a reference sentence has overridden a target sentence. In investigating this task, this paper describes a construction procedure for the dataset of overridden information by collecting sentence pairs from the difference between two versions of Wikipedia. Our developing dataset shows that the old version of Wikipedia contains much overridden information and that the detection of information override is necessary.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="tsuchiya-yokoi-2022-developing">
<titleInfo>
<title>Developing a Dataset of Overridden Information in Wikipedia</title>
</titleInfo>
<name type="personal">
<namePart type="given">Masatoshi</namePart>
<namePart type="family">Tsuchiya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yasutaka</namePart>
<namePart type="family">Yokoi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Thirteenth Language Resources and Evaluation Conference</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Frédéric</namePart>
<namePart type="family">Béchet</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Philippe</namePart>
<namePart type="family">Blache</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christopher</namePart>
<namePart type="family">Cieri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thierry</namePart>
<namePart type="family">Declerck</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sara</namePart>
<namePart type="family">Goggi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hitoshi</namePart>
<namePart type="family">Isahara</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hélène</namePart>
<namePart type="family">Mazo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stelios</namePart>
<namePart type="family">Piperidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association</publisher>
<place>
<placeTerm type="text">Marseille, France</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper proposes a new task of detecting information override. Since all information on the Web is not updated in a timely manner, the necessity is created for information that is overridden by another information source to be discarded. The task is formalized as a binary classification problem to determine whether a reference sentence has overridden a target sentence. In investigating this task, this paper describes a construction procedure for the dataset of overridden information by collecting sentence pairs from the difference between two versions of Wikipedia. Our developing dataset shows that the old version of Wikipedia contains much overridden information and that the detection of information override is necessary.</abstract>
<identifier type="citekey">tsuchiya-yokoi-2022-developing</identifier>
<location>
<url>https://aclanthology.org/2022.lrec-1.601</url>
</location>
<part>
<date>2022-06</date>
<extent unit="page">
<start>5601</start>
<end>5608</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Developing a Dataset of Overridden Information in Wikipedia
%A Tsuchiya, Masatoshi
%A Yokoi, Yasutaka
%Y Calzolari, Nicoletta
%Y Béchet, Frédéric
%Y Blache, Philippe
%Y Choukri, Khalid
%Y Cieri, Christopher
%Y Declerck, Thierry
%Y Goggi, Sara
%Y Isahara, Hitoshi
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Mazo, Hélène
%Y Odijk, Jan
%Y Piperidis, Stelios
%S Proceedings of the Thirteenth Language Resources and Evaluation Conference
%D 2022
%8 June
%I European Language Resources Association
%C Marseille, France
%F tsuchiya-yokoi-2022-developing
%X This paper proposes a new task of detecting information override. Since all information on the Web is not updated in a timely manner, the necessity is created for information that is overridden by another information source to be discarded. The task is formalized as a binary classification problem to determine whether a reference sentence has overridden a target sentence. In investigating this task, this paper describes a construction procedure for the dataset of overridden information by collecting sentence pairs from the difference between two versions of Wikipedia. Our developing dataset shows that the old version of Wikipedia contains much overridden information and that the detection of information override is necessary.
%U https://aclanthology.org/2022.lrec-1.601
%P 5601-5608
Markdown (Informal)
[Developing a Dataset of Overridden Information in Wikipedia](https://aclanthology.org/2022.lrec-1.601) (Tsuchiya & Yokoi, LREC 2022)
ACL