@inproceedings{wan-etal-2020-improving,
title = "Improving Grammatical Error Correction with Data Augmentation by Editing Latent Representation",
author = "Wan, Zhaohong and
Wan, Xiaojun and
Wang, Wenguang",
editor = "Scott, Donia and
Bel, Nuria and
Zong, Chengqing",
booktitle = "Proceedings of the 28th International Conference on Computational Linguistics",
month = dec,
year = "2020",
address = "Barcelona, Spain (Online)",
publisher = "International Committee on Computational Linguistics",
url = "https://aclanthology.org/2020.coling-main.200",
doi = "10.18653/v1/2020.coling-main.200",
pages = "2202--2212",
abstract = "The incorporation of data augmentation method in grammatical error correction task has attracted much attention. However, existing data augmentation methods mainly apply noise to tokens, which leads to the lack of diversity of generated errors. In view of this, we propose a new data augmentation method that can apply noise to the latent representation of a sentence. By editing the latent representations of grammatical sentences, we can generate synthetic samples with various error types. Combining with some pre-defined rules, our method can greatly improve the performance and robustness of existing grammatical error correction models. We evaluate our method on public benchmarks of GEC task and it achieves the state-of-the-art performance on CoNLL-2014 and FCE benchmarks.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="wan-etal-2020-improving">
<titleInfo>
<title>Improving Grammatical Error Correction with Data Augmentation by Editing Latent Representation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Zhaohong</namePart>
<namePart type="family">Wan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xiaojun</namePart>
<namePart type="family">Wan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wenguang</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 28th International Conference on Computational Linguistics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Donia</namePart>
<namePart type="family">Scott</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nuria</namePart>
<namePart type="family">Bel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chengqing</namePart>
<namePart type="family">Zong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>International Committee on Computational Linguistics</publisher>
<place>
<placeTerm type="text">Barcelona, Spain (Online)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The incorporation of data augmentation method in grammatical error correction task has attracted much attention. However, existing data augmentation methods mainly apply noise to tokens, which leads to the lack of diversity of generated errors. In view of this, we propose a new data augmentation method that can apply noise to the latent representation of a sentence. By editing the latent representations of grammatical sentences, we can generate synthetic samples with various error types. Combining with some pre-defined rules, our method can greatly improve the performance and robustness of existing grammatical error correction models. We evaluate our method on public benchmarks of GEC task and it achieves the state-of-the-art performance on CoNLL-2014 and FCE benchmarks.</abstract>
<identifier type="citekey">wan-etal-2020-improving</identifier>
<identifier type="doi">10.18653/v1/2020.coling-main.200</identifier>
<location>
<url>https://aclanthology.org/2020.coling-main.200</url>
</location>
<part>
<date>2020-12</date>
<extent unit="page">
<start>2202</start>
<end>2212</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Improving Grammatical Error Correction with Data Augmentation by Editing Latent Representation
%A Wan, Zhaohong
%A Wan, Xiaojun
%A Wang, Wenguang
%Y Scott, Donia
%Y Bel, Nuria
%Y Zong, Chengqing
%S Proceedings of the 28th International Conference on Computational Linguistics
%D 2020
%8 December
%I International Committee on Computational Linguistics
%C Barcelona, Spain (Online)
%F wan-etal-2020-improving
%X The incorporation of data augmentation method in grammatical error correction task has attracted much attention. However, existing data augmentation methods mainly apply noise to tokens, which leads to the lack of diversity of generated errors. In view of this, we propose a new data augmentation method that can apply noise to the latent representation of a sentence. By editing the latent representations of grammatical sentences, we can generate synthetic samples with various error types. Combining with some pre-defined rules, our method can greatly improve the performance and robustness of existing grammatical error correction models. We evaluate our method on public benchmarks of GEC task and it achieves the state-of-the-art performance on CoNLL-2014 and FCE benchmarks.
%R 10.18653/v1/2020.coling-main.200
%U https://aclanthology.org/2020.coling-main.200
%U https://doi.org/10.18653/v1/2020.coling-main.200
%P 2202-2212
Markdown (Informal)
[Improving Grammatical Error Correction with Data Augmentation by Editing Latent Representation](https://aclanthology.org/2020.coling-main.200) (Wan et al., COLING 2020)
ACL