@inproceedings{piktus-etal-2019-misspelling,
title = "Misspelling Oblivious Word Embeddings",
author = "Piktus, Aleksandra and
Edizel, Necati Bora and
Bojanowski, Piotr and
Grave, Edouard and
Ferreira, Rui and
Silvestri, Fabrizio",
editor = "Burstein, Jill and
Doran, Christy and
Solorio, Thamar",
booktitle = "Proceedings of the 2019 Conference of the North {A}merican Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers)",
month = jun,
year = "2019",
address = "Minneapolis, Minnesota",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/N19-1326",
doi = "10.18653/v1/N19-1326",
pages = "3226--3234",
abstract = "In this paper we present a method to learn word embeddings that are resilient to misspellings. Existing word embeddings have limited applicability to malformed texts, which contain a non-negligible amount of out-of-vocabulary words. We propose a method combining FastText with subwords and a supervised task of learning misspelling patterns. In our method, misspellings of each word are embedded close to their correct variants. We train these embeddings on a new dataset we are releasing publicly. Finally, we experimentally show the advantages of this approach on both intrinsic and extrinsic NLP tasks using public test sets.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="piktus-etal-2019-misspelling">
<titleInfo>
<title>Misspelling Oblivious Word Embeddings</title>
</titleInfo>
<name type="personal">
<namePart type="given">Aleksandra</namePart>
<namePart type="family">Piktus</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Necati</namePart>
<namePart type="given">Bora</namePart>
<namePart type="family">Edizel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Piotr</namePart>
<namePart type="family">Bojanowski</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Edouard</namePart>
<namePart type="family">Grave</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rui</namePart>
<namePart type="family">Ferreira</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fabrizio</namePart>
<namePart type="family">Silvestri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jill</namePart>
<namePart type="family">Burstein</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christy</namePart>
<namePart type="family">Doran</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thamar</namePart>
<namePart type="family">Solorio</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Minneapolis, Minnesota</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this paper we present a method to learn word embeddings that are resilient to misspellings. Existing word embeddings have limited applicability to malformed texts, which contain a non-negligible amount of out-of-vocabulary words. We propose a method combining FastText with subwords and a supervised task of learning misspelling patterns. In our method, misspellings of each word are embedded close to their correct variants. We train these embeddings on a new dataset we are releasing publicly. Finally, we experimentally show the advantages of this approach on both intrinsic and extrinsic NLP tasks using public test sets.</abstract>
<identifier type="citekey">piktus-etal-2019-misspelling</identifier>
<identifier type="doi">10.18653/v1/N19-1326</identifier>
<location>
<url>https://aclanthology.org/N19-1326</url>
</location>
<part>
<date>2019-06</date>
<extent unit="page">
<start>3226</start>
<end>3234</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Misspelling Oblivious Word Embeddings
%A Piktus, Aleksandra
%A Edizel, Necati Bora
%A Bojanowski, Piotr
%A Grave, Edouard
%A Ferreira, Rui
%A Silvestri, Fabrizio
%Y Burstein, Jill
%Y Doran, Christy
%Y Solorio, Thamar
%S Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers)
%D 2019
%8 June
%I Association for Computational Linguistics
%C Minneapolis, Minnesota
%F piktus-etal-2019-misspelling
%X In this paper we present a method to learn word embeddings that are resilient to misspellings. Existing word embeddings have limited applicability to malformed texts, which contain a non-negligible amount of out-of-vocabulary words. We propose a method combining FastText with subwords and a supervised task of learning misspelling patterns. In our method, misspellings of each word are embedded close to their correct variants. We train these embeddings on a new dataset we are releasing publicly. Finally, we experimentally show the advantages of this approach on both intrinsic and extrinsic NLP tasks using public test sets.
%R 10.18653/v1/N19-1326
%U https://aclanthology.org/N19-1326
%U https://doi.org/10.18653/v1/N19-1326
%P 3226-3234
Markdown (Informal)
[Misspelling Oblivious Word Embeddings](https://aclanthology.org/N19-1326) (Piktus et al., NAACL 2019)
ACL
- Aleksandra Piktus, Necati Bora Edizel, Piotr Bojanowski, Edouard Grave, Rui Ferreira, and Fabrizio Silvestri. 2019. Misspelling Oblivious Word Embeddings. In Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers), pages 3226–3234, Minneapolis, Minnesota. Association for Computational Linguistics.