@inproceedings{barteld-2017-detecting,
title = "Detecting spelling variants in non-standard texts",
author = "Barteld, Fabian",
editor = "Kunneman, Florian and
I{\~n}urrieta, Uxoa and
Camilleri, John J. and
Ardanuy, Mariona Coll",
booktitle = "Proceedings of the Student Research Workshop at the 15th Conference of the {E}uropean Chapter of the Association for Computational Linguistics",
month = apr,
year = "2017",
address = "Valencia, Spain",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/E17-4002",
pages = "11--22",
abstract = "Spelling variation in non-standard language, e.g. computer-mediated communication and historical texts, is usually treated as a deviation from a standard spelling, e.g. 2mr as an non-standard spelling for tomorrow. Consequently, in normalization {--} the standard approach of dealing with spelling variation {--} so-called non-standard words are mapped to their corresponding standard words. However, there is not always a corresponding standard word. This can be the case for single types (like emoticons in computer-mediated communication) or a complete language, e.g. texts from historical languages that did not develop to a standard variety. The approach presented in this thesis proposal deals with spelling variation in absence of reference to a standard. The task is to detect pairs of types that are variants of the same morphological word. An approach for spelling-variant detection is presented, where pairs of potential spelling variants are generated with Levenshtein distance and subsequently filtered by supervised machine learning. The approach is evaluated on historical Low German texts. Finally, further perspectives are discussed.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="barteld-2017-detecting">
<titleInfo>
<title>Detecting spelling variants in non-standard texts</title>
</titleInfo>
<name type="personal">
<namePart type="given">Fabian</namePart>
<namePart type="family">Barteld</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2017-04</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Student Research Workshop at the 15th Conference of the European Chapter of the Association for Computational Linguistics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Florian</namePart>
<namePart type="family">Kunneman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Uxoa</namePart>
<namePart type="family">Iñurrieta</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">John</namePart>
<namePart type="given">J</namePart>
<namePart type="family">Camilleri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mariona</namePart>
<namePart type="given">Coll</namePart>
<namePart type="family">Ardanuy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Valencia, Spain</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Spelling variation in non-standard language, e.g. computer-mediated communication and historical texts, is usually treated as a deviation from a standard spelling, e.g. 2mr as an non-standard spelling for tomorrow. Consequently, in normalization – the standard approach of dealing with spelling variation – so-called non-standard words are mapped to their corresponding standard words. However, there is not always a corresponding standard word. This can be the case for single types (like emoticons in computer-mediated communication) or a complete language, e.g. texts from historical languages that did not develop to a standard variety. The approach presented in this thesis proposal deals with spelling variation in absence of reference to a standard. The task is to detect pairs of types that are variants of the same morphological word. An approach for spelling-variant detection is presented, where pairs of potential spelling variants are generated with Levenshtein distance and subsequently filtered by supervised machine learning. The approach is evaluated on historical Low German texts. Finally, further perspectives are discussed.</abstract>
<identifier type="citekey">barteld-2017-detecting</identifier>
<location>
<url>https://aclanthology.org/E17-4002</url>
</location>
<part>
<date>2017-04</date>
<extent unit="page">
<start>11</start>
<end>22</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Detecting spelling variants in non-standard texts
%A Barteld, Fabian
%Y Kunneman, Florian
%Y Iñurrieta, Uxoa
%Y Camilleri, John J.
%Y Ardanuy, Mariona Coll
%S Proceedings of the Student Research Workshop at the 15th Conference of the European Chapter of the Association for Computational Linguistics
%D 2017
%8 April
%I Association for Computational Linguistics
%C Valencia, Spain
%F barteld-2017-detecting
%X Spelling variation in non-standard language, e.g. computer-mediated communication and historical texts, is usually treated as a deviation from a standard spelling, e.g. 2mr as an non-standard spelling for tomorrow. Consequently, in normalization – the standard approach of dealing with spelling variation – so-called non-standard words are mapped to their corresponding standard words. However, there is not always a corresponding standard word. This can be the case for single types (like emoticons in computer-mediated communication) or a complete language, e.g. texts from historical languages that did not develop to a standard variety. The approach presented in this thesis proposal deals with spelling variation in absence of reference to a standard. The task is to detect pairs of types that are variants of the same morphological word. An approach for spelling-variant detection is presented, where pairs of potential spelling variants are generated with Levenshtein distance and subsequently filtered by supervised machine learning. The approach is evaluated on historical Low German texts. Finally, further perspectives are discussed.
%U https://aclanthology.org/E17-4002
%P 11-22
Markdown (Informal)
[Detecting spelling variants in non-standard texts](https://aclanthology.org/E17-4002) (Barteld, EACL 2017)
ACL
- Fabian Barteld. 2017. Detecting spelling variants in non-standard texts. In Proceedings of the Student Research Workshop at the 15th Conference of the European Chapter of the Association for Computational Linguistics, pages 11–22, Valencia, Spain. Association for Computational Linguistics.