@inproceedings{takahashi-etal-2020-grammatical,
title = "Grammatical Error Correction Using Pseudo Learner Corpus Considering Learner{'}s Error Tendency",
author = "Takahashi, Yujin and
Katsumata, Satoru and
Komachi, Mamoru",
editor = "Rijhwani, Shruti and
Liu, Jiangming and
Wang, Yizhong and
Dror, Rotem",
booktitle = "Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics: Student Research Workshop",
month = jul,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.acl-srw.5",
doi = "10.18653/v1/2020.acl-srw.5",
pages = "27--32",
abstract = "Recently, several studies have focused on improving the performance of grammatical error correction (GEC) tasks using pseudo data. However, a large amount of pseudo data are required to train an accurate GEC model. To address the limitations of language and computational resources, we assume that introducing pseudo errors into sentences similar to those written by the language learners is more efficient, rather than incorporating random pseudo errors into monolingual data. In this regard, we study the effect of pseudo data on GEC task performance using two approaches. First, we extract sentences that are similar to the learners{'} sentences from monolingual data. Second, we generate realistic pseudo errors by considering error types that learners often make. Based on our comparative results, we observe that F0.5 scores for the Russian GEC task are significantly improved.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="takahashi-etal-2020-grammatical">
<titleInfo>
<title>Grammatical Error Correction Using Pseudo Learner Corpus Considering Learner’s Error Tendency</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yujin</namePart>
<namePart type="family">Takahashi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Satoru</namePart>
<namePart type="family">Katsumata</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mamoru</namePart>
<namePart type="family">Komachi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics: Student Research Workshop</title>
</titleInfo>
<name type="personal">
<namePart type="given">Shruti</namePart>
<namePart type="family">Rijhwani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jiangming</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yizhong</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rotem</namePart>
<namePart type="family">Dror</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Recently, several studies have focused on improving the performance of grammatical error correction (GEC) tasks using pseudo data. However, a large amount of pseudo data are required to train an accurate GEC model. To address the limitations of language and computational resources, we assume that introducing pseudo errors into sentences similar to those written by the language learners is more efficient, rather than incorporating random pseudo errors into monolingual data. In this regard, we study the effect of pseudo data on GEC task performance using two approaches. First, we extract sentences that are similar to the learners’ sentences from monolingual data. Second, we generate realistic pseudo errors by considering error types that learners often make. Based on our comparative results, we observe that F0.5 scores for the Russian GEC task are significantly improved.</abstract>
<identifier type="citekey">takahashi-etal-2020-grammatical</identifier>
<identifier type="doi">10.18653/v1/2020.acl-srw.5</identifier>
<location>
<url>https://aclanthology.org/2020.acl-srw.5</url>
</location>
<part>
<date>2020-07</date>
<extent unit="page">
<start>27</start>
<end>32</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Grammatical Error Correction Using Pseudo Learner Corpus Considering Learner’s Error Tendency
%A Takahashi, Yujin
%A Katsumata, Satoru
%A Komachi, Mamoru
%Y Rijhwani, Shruti
%Y Liu, Jiangming
%Y Wang, Yizhong
%Y Dror, Rotem
%S Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics: Student Research Workshop
%D 2020
%8 July
%I Association for Computational Linguistics
%C Online
%F takahashi-etal-2020-grammatical
%X Recently, several studies have focused on improving the performance of grammatical error correction (GEC) tasks using pseudo data. However, a large amount of pseudo data are required to train an accurate GEC model. To address the limitations of language and computational resources, we assume that introducing pseudo errors into sentences similar to those written by the language learners is more efficient, rather than incorporating random pseudo errors into monolingual data. In this regard, we study the effect of pseudo data on GEC task performance using two approaches. First, we extract sentences that are similar to the learners’ sentences from monolingual data. Second, we generate realistic pseudo errors by considering error types that learners often make. Based on our comparative results, we observe that F0.5 scores for the Russian GEC task are significantly improved.
%R 10.18653/v1/2020.acl-srw.5
%U https://aclanthology.org/2020.acl-srw.5
%U https://doi.org/10.18653/v1/2020.acl-srw.5
%P 27-32
Markdown (Informal)
[Grammatical Error Correction Using Pseudo Learner Corpus Considering Learner’s Error Tendency](https://aclanthology.org/2020.acl-srw.5) (Takahashi et al., ACL 2020)
ACL