@inproceedings{li-he-2021-data,
title = "Data Augmentation of Incorporating Real Error Patterns and Linguistic Knowledge for Grammatical Error Correction",
author = "Li, Xia and
He, Junyi",
editor = "Bisazza, Arianna and
Abend, Omri",
booktitle = "Proceedings of the 25th Conference on Computational Natural Language Learning",
month = nov,
year = "2021",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.conll-1.17/",
doi = "10.18653/v1/2021.conll-1.17",
pages = "223--233",
abstract = "Data augmentation aims at expanding training data with clean text using noising schemes to improve the performance of grammatical error correction (GEC). In practice, there are a great number of real error patterns in the manually annotated training data. We argue that these real error patterns can be introduced into clean text to effectively generate more real and high quality synthetic data, which is not fully explored by previous studies. Moreover, we also find that linguistic knowledge can be incorporated into data augmentation for generating more representative and more diverse synthetic data. In this paper, we propose a novel data augmentation method that fully considers the real error patterns and the linguistic knowledge for the GEC task. We conduct extensive experiments on public data sets and the experimental results show that our method outperforms several strong baselines with far less external unlabeled clean text data, highlighting its extraordinary effectiveness in the GEC task that lacks large-scale labeled training data."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="li-he-2021-data">
<titleInfo>
<title>Data Augmentation of Incorporating Real Error Patterns and Linguistic Knowledge for Grammatical Error Correction</title>
</titleInfo>
<name type="personal">
<namePart type="given">Xia</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Junyi</namePart>
<namePart type="family">He</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 25th Conference on Computational Natural Language Learning</title>
</titleInfo>
<name type="personal">
<namePart type="given">Arianna</namePart>
<namePart type="family">Bisazza</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Omri</namePart>
<namePart type="family">Abend</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Data augmentation aims at expanding training data with clean text using noising schemes to improve the performance of grammatical error correction (GEC). In practice, there are a great number of real error patterns in the manually annotated training data. We argue that these real error patterns can be introduced into clean text to effectively generate more real and high quality synthetic data, which is not fully explored by previous studies. Moreover, we also find that linguistic knowledge can be incorporated into data augmentation for generating more representative and more diverse synthetic data. In this paper, we propose a novel data augmentation method that fully considers the real error patterns and the linguistic knowledge for the GEC task. We conduct extensive experiments on public data sets and the experimental results show that our method outperforms several strong baselines with far less external unlabeled clean text data, highlighting its extraordinary effectiveness in the GEC task that lacks large-scale labeled training data.</abstract>
<identifier type="citekey">li-he-2021-data</identifier>
<identifier type="doi">10.18653/v1/2021.conll-1.17</identifier>
<location>
<url>https://aclanthology.org/2021.conll-1.17/</url>
</location>
<part>
<date>2021-11</date>
<extent unit="page">
<start>223</start>
<end>233</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Data Augmentation of Incorporating Real Error Patterns and Linguistic Knowledge for Grammatical Error Correction
%A Li, Xia
%A He, Junyi
%Y Bisazza, Arianna
%Y Abend, Omri
%S Proceedings of the 25th Conference on Computational Natural Language Learning
%D 2021
%8 November
%I Association for Computational Linguistics
%C Online
%F li-he-2021-data
%X Data augmentation aims at expanding training data with clean text using noising schemes to improve the performance of grammatical error correction (GEC). In practice, there are a great number of real error patterns in the manually annotated training data. We argue that these real error patterns can be introduced into clean text to effectively generate more real and high quality synthetic data, which is not fully explored by previous studies. Moreover, we also find that linguistic knowledge can be incorporated into data augmentation for generating more representative and more diverse synthetic data. In this paper, we propose a novel data augmentation method that fully considers the real error patterns and the linguistic knowledge for the GEC task. We conduct extensive experiments on public data sets and the experimental results show that our method outperforms several strong baselines with far less external unlabeled clean text data, highlighting its extraordinary effectiveness in the GEC task that lacks large-scale labeled training data.
%R 10.18653/v1/2021.conll-1.17
%U https://aclanthology.org/2021.conll-1.17/
%U https://doi.org/10.18653/v1/2021.conll-1.17
%P 223-233
Markdown (Informal)
[Data Augmentation of Incorporating Real Error Patterns and Linguistic Knowledge for Grammatical Error Correction](https://aclanthology.org/2021.conll-1.17/) (Li & He, CoNLL 2021)
ACL