@inproceedings{katinskaia-yangarber-2021-assessing,
title = "Assessing Grammatical Correctness in Language Learning",
author = "Katinskaia, Anisia and
Yangarber, Roman",
editor = "Burstein, Jill and
Horbach, Andrea and
Kochmar, Ekaterina and
Laarmann-Quante, Ronja and
Leacock, Claudia and
Madnani, Nitin and
Pil{\'a}n, Ildik{\'o} and
Yannakoudakis, Helen and
Zesch, Torsten",
booktitle = "Proceedings of the 16th Workshop on Innovative Use of NLP for Building Educational Applications",
month = apr,
year = "2021",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.bea-1.15",
pages = "135--146",
abstract = "We present experiments on assessing the grammatical correctness of learners{'} answers in a language-learning System (references to the System, and the links to the released data and code are withheld for anonymity). In particular, we explore the problem of detecting alternative-correct answers: when more than one inflected form of a lemma fits syntactically and semantically in a given context. We approach the problem with the methods for grammatical error detection (GED), since we hypothesize that models for detecting grammatical mistakes can assess the correctness of potential alternative answers in a learning setting. Due to the paucity of training data, we explore the ability of pre-trained BERT to detect grammatical errors and then fine-tune it using synthetic training data. In this work, we focus on errors in inflection. Our experiments show a. that pre-trained BERT performs worse at detecting grammatical irregularities for Russian than for English; b. that fine-tuned BERT yields promising results on assessing the correctness of grammatical exercises; and c. establish a new benchmark for Russian. To further investigate its performance, we compare fine-tuned BERT with one of the state-of-the-art models for GED (Bell et al., 2019) on our dataset and RULEC-GEC (Rozovskaya and Roth, 2019). We release the manually annotated learner dataset, used for testing, for general use.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="katinskaia-yangarber-2021-assessing">
<titleInfo>
<title>Assessing Grammatical Correctness in Language Learning</title>
</titleInfo>
<name type="personal">
<namePart type="given">Anisia</namePart>
<namePart type="family">Katinskaia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Roman</namePart>
<namePart type="family">Yangarber</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-04</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 16th Workshop on Innovative Use of NLP for Building Educational Applications</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jill</namePart>
<namePart type="family">Burstein</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andrea</namePart>
<namePart type="family">Horbach</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Kochmar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ronja</namePart>
<namePart type="family">Laarmann-Quante</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Claudia</namePart>
<namePart type="family">Leacock</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nitin</namePart>
<namePart type="family">Madnani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ildikó</namePart>
<namePart type="family">Pilán</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Helen</namePart>
<namePart type="family">Yannakoudakis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Torsten</namePart>
<namePart type="family">Zesch</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We present experiments on assessing the grammatical correctness of learners’ answers in a language-learning System (references to the System, and the links to the released data and code are withheld for anonymity). In particular, we explore the problem of detecting alternative-correct answers: when more than one inflected form of a lemma fits syntactically and semantically in a given context. We approach the problem with the methods for grammatical error detection (GED), since we hypothesize that models for detecting grammatical mistakes can assess the correctness of potential alternative answers in a learning setting. Due to the paucity of training data, we explore the ability of pre-trained BERT to detect grammatical errors and then fine-tune it using synthetic training data. In this work, we focus on errors in inflection. Our experiments show a. that pre-trained BERT performs worse at detecting grammatical irregularities for Russian than for English; b. that fine-tuned BERT yields promising results on assessing the correctness of grammatical exercises; and c. establish a new benchmark for Russian. To further investigate its performance, we compare fine-tuned BERT with one of the state-of-the-art models for GED (Bell et al., 2019) on our dataset and RULEC-GEC (Rozovskaya and Roth, 2019). We release the manually annotated learner dataset, used for testing, for general use.</abstract>
<identifier type="citekey">katinskaia-yangarber-2021-assessing</identifier>
<location>
<url>https://aclanthology.org/2021.bea-1.15</url>
</location>
<part>
<date>2021-04</date>
<extent unit="page">
<start>135</start>
<end>146</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Assessing Grammatical Correctness in Language Learning
%A Katinskaia, Anisia
%A Yangarber, Roman
%Y Burstein, Jill
%Y Horbach, Andrea
%Y Kochmar, Ekaterina
%Y Laarmann-Quante, Ronja
%Y Leacock, Claudia
%Y Madnani, Nitin
%Y Pilán, Ildikó
%Y Yannakoudakis, Helen
%Y Zesch, Torsten
%S Proceedings of the 16th Workshop on Innovative Use of NLP for Building Educational Applications
%D 2021
%8 April
%I Association for Computational Linguistics
%C Online
%F katinskaia-yangarber-2021-assessing
%X We present experiments on assessing the grammatical correctness of learners’ answers in a language-learning System (references to the System, and the links to the released data and code are withheld for anonymity). In particular, we explore the problem of detecting alternative-correct answers: when more than one inflected form of a lemma fits syntactically and semantically in a given context. We approach the problem with the methods for grammatical error detection (GED), since we hypothesize that models for detecting grammatical mistakes can assess the correctness of potential alternative answers in a learning setting. Due to the paucity of training data, we explore the ability of pre-trained BERT to detect grammatical errors and then fine-tune it using synthetic training data. In this work, we focus on errors in inflection. Our experiments show a. that pre-trained BERT performs worse at detecting grammatical irregularities for Russian than for English; b. that fine-tuned BERT yields promising results on assessing the correctness of grammatical exercises; and c. establish a new benchmark for Russian. To further investigate its performance, we compare fine-tuned BERT with one of the state-of-the-art models for GED (Bell et al., 2019) on our dataset and RULEC-GEC (Rozovskaya and Roth, 2019). We release the manually annotated learner dataset, used for testing, for general use.
%U https://aclanthology.org/2021.bea-1.15
%P 135-146
Markdown (Informal)
[Assessing Grammatical Correctness in Language Learning](https://aclanthology.org/2021.bea-1.15) (Katinskaia & Yangarber, BEA 2021)
ACL