@inproceedings{li-etal-2022-unregulated,
title = "Unregulated {C}hinese-to-{E}nglish Data Expansion Does {NOT} Work for Neural Event Detection",
author = "Li, Zhongqiu and
Hong, Yu and
Wang, Jie and
He, Shiming and
Yao, Jianmin and
Zhou, Guodong",
booktitle = "Proceedings of the 29th International Conference on Computational Linguistics",
month = oct,
year = "2022",
address = "Gyeongju, Republic of Korea",
publisher = "International Committee on Computational Linguistics",
url = "https://aclanthology.org/2022.coling-1.232",
pages = "2633--2638",
abstract = "We leverage cross-language data expansion and retraining to enhance neural Event Detection (abbr., ED) on English ACE corpus. Machine translation is utilized for expanding English training set of ED from that of Chinese. However, experimental results illustrate that such strategy actually results in performance degradation. The survey of translations suggests that the mistakenly-aligned triggers in the expanded data negatively influences the retraining process. We refer this phenomenon to {``}trigger falsification{''}. To overcome the issue, we apply heuristic rules for regulating the expanded data, fixing the distracting samples that contain the falsified triggers. The supplementary experiments show that the rule-based regulation is beneficial, yielding the improvement of about 1.6{\%} F1-score for ED. We additionally prove that, instead of transfer learning from the translated ED data, the straight data combination by random pouring surprisingly performs better.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="li-etal-2022-unregulated">
<titleInfo>
<title>Unregulated Chinese-to-English Data Expansion Does NOT Work for Neural Event Detection</title>
</titleInfo>
<name type="personal">
<namePart type="given">Zhongqiu</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yu</namePart>
<namePart type="family">Hong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jie</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shiming</namePart>
<namePart type="family">He</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jianmin</namePart>
<namePart type="family">Yao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Guodong</namePart>
<namePart type="family">Zhou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-10</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 29th International Conference on Computational Linguistics</title>
</titleInfo>
<originInfo>
<publisher>International Committee on Computational Linguistics</publisher>
<place>
<placeTerm type="text">Gyeongju, Republic of Korea</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We leverage cross-language data expansion and retraining to enhance neural Event Detection (abbr., ED) on English ACE corpus. Machine translation is utilized for expanding English training set of ED from that of Chinese. However, experimental results illustrate that such strategy actually results in performance degradation. The survey of translations suggests that the mistakenly-aligned triggers in the expanded data negatively influences the retraining process. We refer this phenomenon to “trigger falsification”. To overcome the issue, we apply heuristic rules for regulating the expanded data, fixing the distracting samples that contain the falsified triggers. The supplementary experiments show that the rule-based regulation is beneficial, yielding the improvement of about 1.6% F1-score for ED. We additionally prove that, instead of transfer learning from the translated ED data, the straight data combination by random pouring surprisingly performs better.</abstract>
<identifier type="citekey">li-etal-2022-unregulated</identifier>
<location>
<url>https://aclanthology.org/2022.coling-1.232</url>
</location>
<part>
<date>2022-10</date>
<extent unit="page">
<start>2633</start>
<end>2638</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Unregulated Chinese-to-English Data Expansion Does NOT Work for Neural Event Detection
%A Li, Zhongqiu
%A Hong, Yu
%A Wang, Jie
%A He, Shiming
%A Yao, Jianmin
%A Zhou, Guodong
%S Proceedings of the 29th International Conference on Computational Linguistics
%D 2022
%8 October
%I International Committee on Computational Linguistics
%C Gyeongju, Republic of Korea
%F li-etal-2022-unregulated
%X We leverage cross-language data expansion and retraining to enhance neural Event Detection (abbr., ED) on English ACE corpus. Machine translation is utilized for expanding English training set of ED from that of Chinese. However, experimental results illustrate that such strategy actually results in performance degradation. The survey of translations suggests that the mistakenly-aligned triggers in the expanded data negatively influences the retraining process. We refer this phenomenon to “trigger falsification”. To overcome the issue, we apply heuristic rules for regulating the expanded data, fixing the distracting samples that contain the falsified triggers. The supplementary experiments show that the rule-based regulation is beneficial, yielding the improvement of about 1.6% F1-score for ED. We additionally prove that, instead of transfer learning from the translated ED data, the straight data combination by random pouring surprisingly performs better.
%U https://aclanthology.org/2022.coling-1.232
%P 2633-2638
Markdown (Informal)
[Unregulated Chinese-to-English Data Expansion Does NOT Work for Neural Event Detection](https://aclanthology.org/2022.coling-1.232) (Li et al., COLING 2022)
ACL