@inproceedings{pilan-volodina-2018-exploring,
title = "Exploring word embeddings and phonological similarity for the unsupervised correction of language learner errors",
author = "Pil{\'a}n, Ildik{\'o} and
Volodina, Elena",
editor = "Alex, Beatrice and
Degaetano-Ortlieb, Stefania and
Feldman, Anna and
Kazantseva, Anna and
Reiter, Nils and
Szpakowicz, Stan",
booktitle = "Proceedings of the Second Joint {SIGHUM} Workshop on Computational Linguistics for Cultural Heritage, Social Sciences, Humanities and Literature",
month = aug,
year = "2018",
address = "Santa Fe, New Mexico",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W18-4514",
pages = "119--128",
abstract = "The presence of misspellings and other errors or non-standard word forms poses a considerable challenge for NLP systems. Although several supervised approaches have been proposed previously to normalize these, annotated training data is scarce for many languages. We investigate, therefore, an unsupervised method where correction candidates for Swedish language learners{'} errors are retrieved from word embeddings. Furthermore, we compare the usefulness of combining cosine similarity with orthographic and phonological similarity based on a neural grapheme-to-phoneme conversion system we train for this purpose. Although combinations of similarity measures have been explored for finding error correction candidates, it remains unclear how these measures relate to each other and how much they contribute individually to identifying the correct alternative. We experiment with different combinations of these and find that integrating phonological information is especially useful when the majority of learner errors are related to misspellings, but less so when errors are of a variety of types including, e.g. grammatical errors.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="pilan-volodina-2018-exploring">
<titleInfo>
<title>Exploring word embeddings and phonological similarity for the unsupervised correction of language learner errors</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ildikó</namePart>
<namePart type="family">Pilán</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Elena</namePart>
<namePart type="family">Volodina</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2018-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Second Joint SIGHUM Workshop on Computational Linguistics for Cultural Heritage, Social Sciences, Humanities and Literature</title>
</titleInfo>
<name type="personal">
<namePart type="given">Beatrice</namePart>
<namePart type="family">Alex</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stefania</namePart>
<namePart type="family">Degaetano-Ortlieb</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anna</namePart>
<namePart type="family">Feldman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anna</namePart>
<namePart type="family">Kazantseva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nils</namePart>
<namePart type="family">Reiter</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stan</namePart>
<namePart type="family">Szpakowicz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Santa Fe, New Mexico</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The presence of misspellings and other errors or non-standard word forms poses a considerable challenge for NLP systems. Although several supervised approaches have been proposed previously to normalize these, annotated training data is scarce for many languages. We investigate, therefore, an unsupervised method where correction candidates for Swedish language learners’ errors are retrieved from word embeddings. Furthermore, we compare the usefulness of combining cosine similarity with orthographic and phonological similarity based on a neural grapheme-to-phoneme conversion system we train for this purpose. Although combinations of similarity measures have been explored for finding error correction candidates, it remains unclear how these measures relate to each other and how much they contribute individually to identifying the correct alternative. We experiment with different combinations of these and find that integrating phonological information is especially useful when the majority of learner errors are related to misspellings, but less so when errors are of a variety of types including, e.g. grammatical errors.</abstract>
<identifier type="citekey">pilan-volodina-2018-exploring</identifier>
<location>
<url>https://aclanthology.org/W18-4514</url>
</location>
<part>
<date>2018-08</date>
<extent unit="page">
<start>119</start>
<end>128</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Exploring word embeddings and phonological similarity for the unsupervised correction of language learner errors
%A Pilán, Ildikó
%A Volodina, Elena
%Y Alex, Beatrice
%Y Degaetano-Ortlieb, Stefania
%Y Feldman, Anna
%Y Kazantseva, Anna
%Y Reiter, Nils
%Y Szpakowicz, Stan
%S Proceedings of the Second Joint SIGHUM Workshop on Computational Linguistics for Cultural Heritage, Social Sciences, Humanities and Literature
%D 2018
%8 August
%I Association for Computational Linguistics
%C Santa Fe, New Mexico
%F pilan-volodina-2018-exploring
%X The presence of misspellings and other errors or non-standard word forms poses a considerable challenge for NLP systems. Although several supervised approaches have been proposed previously to normalize these, annotated training data is scarce for many languages. We investigate, therefore, an unsupervised method where correction candidates for Swedish language learners’ errors are retrieved from word embeddings. Furthermore, we compare the usefulness of combining cosine similarity with orthographic and phonological similarity based on a neural grapheme-to-phoneme conversion system we train for this purpose. Although combinations of similarity measures have been explored for finding error correction candidates, it remains unclear how these measures relate to each other and how much they contribute individually to identifying the correct alternative. We experiment with different combinations of these and find that integrating phonological information is especially useful when the majority of learner errors are related to misspellings, but less so when errors are of a variety of types including, e.g. grammatical errors.
%U https://aclanthology.org/W18-4514
%P 119-128
Markdown (Informal)
[Exploring word embeddings and phonological similarity for the unsupervised correction of language learner errors](https://aclanthology.org/W18-4514) (Pilán & Volodina, LaTeCH 2018)
ACL