@inproceedings{liu-etal-2024-towards,
title = "Towards Robust Temporal Activity Localization Learning with Noisy Labels",
author = "Liu, Daizong and
Qu, Xiaoye and
Fang, Xiang and
Dong, Jianfeng and
Zhou, Pan and
Nan, Guoshun and
Tang, Keke and
Fang, Wanlong and
Cheng, Yu",
editor = "Calzolari, Nicoletta and
Kan, Min-Yen and
Hoste, Veronique and
Lenci, Alessandro and
Sakti, Sakriani and
Xue, Nianwen",
booktitle = "Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)",
month = may,
year = "2024",
address = "Torino, Italia",
publisher = "ELRA and ICCL",
url = "https://aclanthology.org/2024.lrec-main.1445",
pages = "16630--16642",
abstract = "This paper addresses the task of temporal activity localization (TAL). Although recent works have made significant progress in TAL research, almost all of them implicitly assume that the dense frame-level correspondences in each video-query pair are correctly annotated. However, in reality, such an assumption is extremely expensive and even impossible to satisfy due to subjective labeling. To alleviate this issue, in this paper, we explore a new TAL setting termed Noisy Temporal activity localization (NTAL), where a TAL model should be robust to the mixed training data with noisy moment boundaries. Inspired by the memorization effect of neural networks, we propose a novel method called Co-Teaching Regularizer (CTR) for NTAL. Specifically, we first learn a Gaussian Mixture Model to divide the mixed training data into preliminary clean and noisy subsets. Subsequently, we refine the labels of the two subsets by an adaptive prediction function so that their true positive and false positive samples could be identified. To avoid single model being prone to its mistakes learned by the mixed data, we adopt a co-teaching paradigm, which utilizes two models sharing the same framework to teach each other for robust learning. A curriculum strategy is further introduced to gradually learn the moment confidence from easy to hard. Experiments on three datasets demonstrate that our CTR is significantly more robust to the noisy training data compared to the existing methods.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="liu-etal-2024-towards">
<titleInfo>
<title>Towards Robust Temporal Activity Localization Learning with Noisy Labels</title>
</titleInfo>
<name type="personal">
<namePart type="given">Daizong</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xiaoye</namePart>
<namePart type="family">Qu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xiang</namePart>
<namePart type="family">Fang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jianfeng</namePart>
<namePart type="family">Dong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pan</namePart>
<namePart type="family">Zhou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Guoshun</namePart>
<namePart type="family">Nan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Keke</namePart>
<namePart type="family">Tang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wanlong</namePart>
<namePart type="family">Fang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yu</namePart>
<namePart type="family">Cheng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Min-Yen</namePart>
<namePart type="family">Kan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Veronique</namePart>
<namePart type="family">Hoste</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alessandro</namePart>
<namePart type="family">Lenci</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sakriani</namePart>
<namePart type="family">Sakti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nianwen</namePart>
<namePart type="family">Xue</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>ELRA and ICCL</publisher>
<place>
<placeTerm type="text">Torino, Italia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper addresses the task of temporal activity localization (TAL). Although recent works have made significant progress in TAL research, almost all of them implicitly assume that the dense frame-level correspondences in each video-query pair are correctly annotated. However, in reality, such an assumption is extremely expensive and even impossible to satisfy due to subjective labeling. To alleviate this issue, in this paper, we explore a new TAL setting termed Noisy Temporal activity localization (NTAL), where a TAL model should be robust to the mixed training data with noisy moment boundaries. Inspired by the memorization effect of neural networks, we propose a novel method called Co-Teaching Regularizer (CTR) for NTAL. Specifically, we first learn a Gaussian Mixture Model to divide the mixed training data into preliminary clean and noisy subsets. Subsequently, we refine the labels of the two subsets by an adaptive prediction function so that their true positive and false positive samples could be identified. To avoid single model being prone to its mistakes learned by the mixed data, we adopt a co-teaching paradigm, which utilizes two models sharing the same framework to teach each other for robust learning. A curriculum strategy is further introduced to gradually learn the moment confidence from easy to hard. Experiments on three datasets demonstrate that our CTR is significantly more robust to the noisy training data compared to the existing methods.</abstract>
<identifier type="citekey">liu-etal-2024-towards</identifier>
<location>
<url>https://aclanthology.org/2024.lrec-main.1445</url>
</location>
<part>
<date>2024-05</date>
<extent unit="page">
<start>16630</start>
<end>16642</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Towards Robust Temporal Activity Localization Learning with Noisy Labels
%A Liu, Daizong
%A Qu, Xiaoye
%A Fang, Xiang
%A Dong, Jianfeng
%A Zhou, Pan
%A Nan, Guoshun
%A Tang, Keke
%A Fang, Wanlong
%A Cheng, Yu
%Y Calzolari, Nicoletta
%Y Kan, Min-Yen
%Y Hoste, Veronique
%Y Lenci, Alessandro
%Y Sakti, Sakriani
%Y Xue, Nianwen
%S Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)
%D 2024
%8 May
%I ELRA and ICCL
%C Torino, Italia
%F liu-etal-2024-towards
%X This paper addresses the task of temporal activity localization (TAL). Although recent works have made significant progress in TAL research, almost all of them implicitly assume that the dense frame-level correspondences in each video-query pair are correctly annotated. However, in reality, such an assumption is extremely expensive and even impossible to satisfy due to subjective labeling. To alleviate this issue, in this paper, we explore a new TAL setting termed Noisy Temporal activity localization (NTAL), where a TAL model should be robust to the mixed training data with noisy moment boundaries. Inspired by the memorization effect of neural networks, we propose a novel method called Co-Teaching Regularizer (CTR) for NTAL. Specifically, we first learn a Gaussian Mixture Model to divide the mixed training data into preliminary clean and noisy subsets. Subsequently, we refine the labels of the two subsets by an adaptive prediction function so that their true positive and false positive samples could be identified. To avoid single model being prone to its mistakes learned by the mixed data, we adopt a co-teaching paradigm, which utilizes two models sharing the same framework to teach each other for robust learning. A curriculum strategy is further introduced to gradually learn the moment confidence from easy to hard. Experiments on three datasets demonstrate that our CTR is significantly more robust to the noisy training data compared to the existing methods.
%U https://aclanthology.org/2024.lrec-main.1445
%P 16630-16642
Markdown (Informal)
[Towards Robust Temporal Activity Localization Learning with Noisy Labels](https://aclanthology.org/2024.lrec-main.1445) (Liu et al., LREC-COLING 2024)
ACL
- Daizong Liu, Xiaoye Qu, Xiang Fang, Jianfeng Dong, Pan Zhou, Guoshun Nan, Keke Tang, Wanlong Fang, and Yu Cheng. 2024. Towards Robust Temporal Activity Localization Learning with Noisy Labels. In Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024), pages 16630–16642, Torino, Italia. ELRA and ICCL.