@inproceedings{rama-coltekin-2016-lstm,
title = "{LSTM} Autoencoders for Dialect Analysis",
author = {Rama, Taraka and
\c C\"oltekin, \c Ca\u gr\i},
editor = {Nakov, Preslav and
Zampieri, Marcos and
Tan, Liling and
Ljube\v si\'c, Nikola and
Tiedemann, J\"org and
Malmasi, Shervin},
booktitle = "Proceedings of the Third Workshop on {NLP} for Similar Languages, Varieties and Dialects ({V}ar{D}ial3)",
month = dec,
year = "2016",
address = "Osaka, Japan",
publisher = "The COLING 2016 Organizing Committee",
url = "https://aclanthology.org/W16-4803/",
pages = "25--32",
abstract = "Computational approaches for dialectometry employed Levenshtein distance to compute an aggregate similarity between two dialects belonging to a single language group. In this paper, we apply a sequence-to-sequence autoencoder to learn a deep representation for words that can be used for meaningful comparison across dialects. In contrast to the alignment-based methods, our method does not require explicit alignments. We apply our architectures to three different datasets and show that the learned representations indicate highly similar results with the analyses based on Levenshtein distance and capture the traditional dialectal differences shown by dialectologists."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="rama-coltekin-2016-lstm">
<titleInfo>
<title>LSTM Autoencoders for Dialect Analysis</title>
</titleInfo>
<name type="personal">
<namePart type="given">Taraka</namePart>
<namePart type="family">Rama</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">\c</namePart>
<namePart type="given">Ca\u</namePart>
<namePart type="given">grı</namePart>
<namePart type="family">Çöltekin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2016-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Third Workshop on NLP for Similar Languages, Varieties and Dialects (VarDial3)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Preslav</namePart>
<namePart type="family">Nakov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marcos</namePart>
<namePart type="family">Zampieri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Liling</namePart>
<namePart type="family">Tan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nikola</namePart>
<namePart type="family">Ljubešić</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jörg</namePart>
<namePart type="family">Tiedemann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shervin</namePart>
<namePart type="family">Malmasi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>The COLING 2016 Organizing Committee</publisher>
<place>
<placeTerm type="text">Osaka, Japan</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Computational approaches for dialectometry employed Levenshtein distance to compute an aggregate similarity between two dialects belonging to a single language group. In this paper, we apply a sequence-to-sequence autoencoder to learn a deep representation for words that can be used for meaningful comparison across dialects. In contrast to the alignment-based methods, our method does not require explicit alignments. We apply our architectures to three different datasets and show that the learned representations indicate highly similar results with the analyses based on Levenshtein distance and capture the traditional dialectal differences shown by dialectologists.</abstract>
<identifier type="citekey">rama-coltekin-2016-lstm</identifier>
<location>
<url>https://aclanthology.org/W16-4803/</url>
</location>
<part>
<date>2016-12</date>
<extent unit="page">
<start>25</start>
<end>32</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T LSTM Autoencoders for Dialect Analysis
%A Rama, Taraka
%A Çöltekin, \c Ca\u grı
%Y Nakov, Preslav
%Y Zampieri, Marcos
%Y Tan, Liling
%Y Ljubešić, Nikola
%Y Tiedemann, Jörg
%Y Malmasi, Shervin
%S Proceedings of the Third Workshop on NLP for Similar Languages, Varieties and Dialects (VarDial3)
%D 2016
%8 December
%I The COLING 2016 Organizing Committee
%C Osaka, Japan
%F rama-coltekin-2016-lstm
%X Computational approaches for dialectometry employed Levenshtein distance to compute an aggregate similarity between two dialects belonging to a single language group. In this paper, we apply a sequence-to-sequence autoencoder to learn a deep representation for words that can be used for meaningful comparison across dialects. In contrast to the alignment-based methods, our method does not require explicit alignments. We apply our architectures to three different datasets and show that the learned representations indicate highly similar results with the analyses based on Levenshtein distance and capture the traditional dialectal differences shown by dialectologists.
%U https://aclanthology.org/W16-4803/
%P 25-32
Markdown (Informal)
[LSTM Autoencoders for Dialect Analysis](https://aclanthology.org/W16-4803/) (Rama & Çöltekin, VarDial 2016)
ACL
- Taraka Rama and Çağrı Çöltekin. 2016. LSTM Autoencoders for Dialect Analysis. In Proceedings of the Third Workshop on NLP for Similar Languages, Varieties and Dialects (VarDial3), pages 25–32, Osaka, Japan. The COLING 2016 Organizing Committee.