@inproceedings{caines-etal-2020-grammatical,
title = "Grammatical error detection in transcriptions of spoken {E}nglish",
author = "Caines, Andrew and
Bentz, Christian and
Knill, Kate and
Rei, Marek and
Buttery, Paula",
editor = "Scott, Donia and
Bel, Nuria and
Zong, Chengqing",
booktitle = "Proceedings of the 28th International Conference on Computational Linguistics",
month = dec,
year = "2020",
address = "Barcelona, Spain (Online)",
publisher = "International Committee on Computational Linguistics",
url = "https://aclanthology.org/2020.coling-main.195",
doi = "10.18653/v1/2020.coling-main.195",
pages = "2144--2162",
abstract = "We describe the collection of transcription corrections and grammatical error annotations for the CrowdED Corpus of spoken English monologues on business topics. The corpus recordings were crowdsourced from native speakers of English and learners of English with German as their first language. The new transcriptions and annotations are obtained from different crowdworkers: we analyse the 1108 new crowdworker submissions and propose that they can be used for automatic transcription post-editing and grammatical error correction for speech. To further explore the data we train grammatical error detection models with various configurations including pre-trained and contextual word representations as input, additional features and auxiliary objectives, and extra training data from written error-annotated corpora. We find that a model concatenating pre-trained and contextual word representations as input performs best, and that additional information does not lead to further performance gains.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="caines-etal-2020-grammatical">
<titleInfo>
<title>Grammatical error detection in transcriptions of spoken English</title>
</titleInfo>
<name type="personal">
<namePart type="given">Andrew</namePart>
<namePart type="family">Caines</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christian</namePart>
<namePart type="family">Bentz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kate</namePart>
<namePart type="family">Knill</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marek</namePart>
<namePart type="family">Rei</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Paula</namePart>
<namePart type="family">Buttery</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 28th International Conference on Computational Linguistics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Donia</namePart>
<namePart type="family">Scott</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nuria</namePart>
<namePart type="family">Bel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chengqing</namePart>
<namePart type="family">Zong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>International Committee on Computational Linguistics</publisher>
<place>
<placeTerm type="text">Barcelona, Spain (Online)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We describe the collection of transcription corrections and grammatical error annotations for the CrowdED Corpus of spoken English monologues on business topics. The corpus recordings were crowdsourced from native speakers of English and learners of English with German as their first language. The new transcriptions and annotations are obtained from different crowdworkers: we analyse the 1108 new crowdworker submissions and propose that they can be used for automatic transcription post-editing and grammatical error correction for speech. To further explore the data we train grammatical error detection models with various configurations including pre-trained and contextual word representations as input, additional features and auxiliary objectives, and extra training data from written error-annotated corpora. We find that a model concatenating pre-trained and contextual word representations as input performs best, and that additional information does not lead to further performance gains.</abstract>
<identifier type="citekey">caines-etal-2020-grammatical</identifier>
<identifier type="doi">10.18653/v1/2020.coling-main.195</identifier>
<location>
<url>https://aclanthology.org/2020.coling-main.195</url>
</location>
<part>
<date>2020-12</date>
<extent unit="page">
<start>2144</start>
<end>2162</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Grammatical error detection in transcriptions of spoken English
%A Caines, Andrew
%A Bentz, Christian
%A Knill, Kate
%A Rei, Marek
%A Buttery, Paula
%Y Scott, Donia
%Y Bel, Nuria
%Y Zong, Chengqing
%S Proceedings of the 28th International Conference on Computational Linguistics
%D 2020
%8 December
%I International Committee on Computational Linguistics
%C Barcelona, Spain (Online)
%F caines-etal-2020-grammatical
%X We describe the collection of transcription corrections and grammatical error annotations for the CrowdED Corpus of spoken English monologues on business topics. The corpus recordings were crowdsourced from native speakers of English and learners of English with German as their first language. The new transcriptions and annotations are obtained from different crowdworkers: we analyse the 1108 new crowdworker submissions and propose that they can be used for automatic transcription post-editing and grammatical error correction for speech. To further explore the data we train grammatical error detection models with various configurations including pre-trained and contextual word representations as input, additional features and auxiliary objectives, and extra training data from written error-annotated corpora. We find that a model concatenating pre-trained and contextual word representations as input performs best, and that additional information does not lead to further performance gains.
%R 10.18653/v1/2020.coling-main.195
%U https://aclanthology.org/2020.coling-main.195
%U https://doi.org/10.18653/v1/2020.coling-main.195
%P 2144-2162
Markdown (Informal)
[Grammatical error detection in transcriptions of spoken English](https://aclanthology.org/2020.coling-main.195) (Caines et al., COLING 2020)
ACL
- Andrew Caines, Christian Bentz, Kate Knill, Marek Rei, and Paula Buttery. 2020. Grammatical error detection in transcriptions of spoken English. In Proceedings of the 28th International Conference on Computational Linguistics, pages 2144–2162, Barcelona, Spain (Online). International Committee on Computational Linguistics.