@inproceedings{liu-etal-2022-ceta,
title = "{CETA}: A Consensus Enhanced Training Approach for Denoising in Distantly Supervised Relation Extraction",
author = "Liu, Ruri and
Mo, Shasha and
Niu, Jianwei and
Fan, Shengda",
editor = "Calzolari, Nicoletta and
Huang, Chu-Ren and
Kim, Hansaem and
Pustejovsky, James and
Wanner, Leo and
Choi, Key-Sun and
Ryu, Pum-Mo and
Chen, Hsin-Hsi and
Donatelli, Lucia and
Ji, Heng and
Kurohashi, Sadao and
Paggio, Patrizia and
Xue, Nianwen and
Kim, Seokhwan and
Hahm, Younggyun and
He, Zhong and
Lee, Tony Kyungil and
Santus, Enrico and
Bond, Francis and
Na, Seung-Hoon",
booktitle = "Proceedings of the 29th International Conference on Computational Linguistics",
month = oct,
year = "2022",
address = "Gyeongju, Republic of Korea",
publisher = "International Committee on Computational Linguistics",
url = "https://aclanthology.org/2022.coling-1.197",
pages = "2247--2258",
abstract = "Distantly supervised relation extraction aims to extract relational facts from texts but suffers from noisy instances. Existing methods usually select reliable sentences that rely on potential noisy labels, resulting in wrongly selecting many noisy training instances or underutilizing a large amount of valuable training data. This paper proposes a sentence-level DSRE method beyond typical instance selection approaches by preventing samples from falling into the wrong classification space on the feature space. Specifically, a theorem for denoising and the corresponding implementation, named Consensus Enhanced Training Approach (CETA), are proposed in this paper. By training the model with CETA, samples of different classes are separated, and samples of the same class are closely clustered in the feature space. Thus the model can easily establish the robust classification boundary to prevent noisy labels from biasing wrongly labeled samples into the wrong classification space. This process is achieved by enhancing the classification consensus between two discrepant classifiers and does not depend on any potential noisy labels, thus avoiding the above two limitations. Extensive experiments on widely-used benchmarks have demonstrated that CETA significantly outperforms the previous methods and achieves new state-of-the-art results.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="liu-etal-2022-ceta">
<titleInfo>
<title>CETA: A Consensus Enhanced Training Approach for Denoising in Distantly Supervised Relation Extraction</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ruri</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shasha</namePart>
<namePart type="family">Mo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jianwei</namePart>
<namePart type="family">Niu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shengda</namePart>
<namePart type="family">Fan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-10</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 29th International Conference on Computational Linguistics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chu-Ren</namePart>
<namePart type="family">Huang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hansaem</namePart>
<namePart type="family">Kim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">James</namePart>
<namePart type="family">Pustejovsky</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Leo</namePart>
<namePart type="family">Wanner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Key-Sun</namePart>
<namePart type="family">Choi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pum-Mo</namePart>
<namePart type="family">Ryu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hsin-Hsi</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lucia</namePart>
<namePart type="family">Donatelli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Heng</namePart>
<namePart type="family">Ji</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sadao</namePart>
<namePart type="family">Kurohashi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Patrizia</namePart>
<namePart type="family">Paggio</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nianwen</namePart>
<namePart type="family">Xue</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Seokhwan</namePart>
<namePart type="family">Kim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Younggyun</namePart>
<namePart type="family">Hahm</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhong</namePart>
<namePart type="family">He</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tony</namePart>
<namePart type="given">Kyungil</namePart>
<namePart type="family">Lee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Enrico</namePart>
<namePart type="family">Santus</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Francis</namePart>
<namePart type="family">Bond</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Seung-Hoon</namePart>
<namePart type="family">Na</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>International Committee on Computational Linguistics</publisher>
<place>
<placeTerm type="text">Gyeongju, Republic of Korea</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Distantly supervised relation extraction aims to extract relational facts from texts but suffers from noisy instances. Existing methods usually select reliable sentences that rely on potential noisy labels, resulting in wrongly selecting many noisy training instances or underutilizing a large amount of valuable training data. This paper proposes a sentence-level DSRE method beyond typical instance selection approaches by preventing samples from falling into the wrong classification space on the feature space. Specifically, a theorem for denoising and the corresponding implementation, named Consensus Enhanced Training Approach (CETA), are proposed in this paper. By training the model with CETA, samples of different classes are separated, and samples of the same class are closely clustered in the feature space. Thus the model can easily establish the robust classification boundary to prevent noisy labels from biasing wrongly labeled samples into the wrong classification space. This process is achieved by enhancing the classification consensus between two discrepant classifiers and does not depend on any potential noisy labels, thus avoiding the above two limitations. Extensive experiments on widely-used benchmarks have demonstrated that CETA significantly outperforms the previous methods and achieves new state-of-the-art results.</abstract>
<identifier type="citekey">liu-etal-2022-ceta</identifier>
<location>
<url>https://aclanthology.org/2022.coling-1.197</url>
</location>
<part>
<date>2022-10</date>
<extent unit="page">
<start>2247</start>
<end>2258</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T CETA: A Consensus Enhanced Training Approach for Denoising in Distantly Supervised Relation Extraction
%A Liu, Ruri
%A Mo, Shasha
%A Niu, Jianwei
%A Fan, Shengda
%Y Calzolari, Nicoletta
%Y Huang, Chu-Ren
%Y Kim, Hansaem
%Y Pustejovsky, James
%Y Wanner, Leo
%Y Choi, Key-Sun
%Y Ryu, Pum-Mo
%Y Chen, Hsin-Hsi
%Y Donatelli, Lucia
%Y Ji, Heng
%Y Kurohashi, Sadao
%Y Paggio, Patrizia
%Y Xue, Nianwen
%Y Kim, Seokhwan
%Y Hahm, Younggyun
%Y He, Zhong
%Y Lee, Tony Kyungil
%Y Santus, Enrico
%Y Bond, Francis
%Y Na, Seung-Hoon
%S Proceedings of the 29th International Conference on Computational Linguistics
%D 2022
%8 October
%I International Committee on Computational Linguistics
%C Gyeongju, Republic of Korea
%F liu-etal-2022-ceta
%X Distantly supervised relation extraction aims to extract relational facts from texts but suffers from noisy instances. Existing methods usually select reliable sentences that rely on potential noisy labels, resulting in wrongly selecting many noisy training instances or underutilizing a large amount of valuable training data. This paper proposes a sentence-level DSRE method beyond typical instance selection approaches by preventing samples from falling into the wrong classification space on the feature space. Specifically, a theorem for denoising and the corresponding implementation, named Consensus Enhanced Training Approach (CETA), are proposed in this paper. By training the model with CETA, samples of different classes are separated, and samples of the same class are closely clustered in the feature space. Thus the model can easily establish the robust classification boundary to prevent noisy labels from biasing wrongly labeled samples into the wrong classification space. This process is achieved by enhancing the classification consensus between two discrepant classifiers and does not depend on any potential noisy labels, thus avoiding the above two limitations. Extensive experiments on widely-used benchmarks have demonstrated that CETA significantly outperforms the previous methods and achieves new state-of-the-art results.
%U https://aclanthology.org/2022.coling-1.197
%P 2247-2258
Markdown (Informal)
[CETA: A Consensus Enhanced Training Approach for Denoising in Distantly Supervised Relation Extraction](https://aclanthology.org/2022.coling-1.197) (Liu et al., COLING 2022)
ACL