@inproceedings{coltekin-etal-2018-tubingen,
title = {{T}{\"u}bingen-{O}slo Team at the {V}ar{D}ial 2018 Evaluation Campaign: An Analysis of N-gram Features in Language Variety Identification},
author = {{\c{C}}{\"o}ltekin, {\c{C}}a{\u{g}}r{\i} and
Rama, Taraka and
Blaschke, Verena},
editor = {Zampieri, Marcos and
Nakov, Preslav and
Ljube{\v{s}}i{\'c}, Nikola and
Tiedemann, J{\"o}rg and
Malmasi, Shervin and
Ali, Ahmed},
booktitle = "Proceedings of the Fifth Workshop on {NLP} for Similar Languages, Varieties and Dialects ({V}ar{D}ial 2018)",
month = aug,
year = "2018",
address = "Santa Fe, New Mexico, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W18-3906",
pages = "55--65",
abstract = "This paper describes our systems for the VarDial 2018 evaluation campaign. We participated in all language identification tasks, namely, Arabic dialect identification (ADI), German dialect identification (GDI), discriminating between Dutch and Flemish in Subtitles (DFS), and Indo-Aryan Language Identification (ILI). In all of the tasks, we only used textual transcripts (not using audio features for ADI). We submitted system runs based on support vector machine classifiers (SVMs) with bag of character and word n-grams as features, and gated bidirectional recurrent neural networks (RNNs) using units of characters and words. Our SVM models outperformed our RNN models in all tasks, obtaining the first place on the DFS task, third place on the ADI task, and second place on others according to the official rankings. As well as describing the models we used in the shared task participation, we present an analysis of the n-gram features used by the SVM models in each task, and also report additional results (that were run after the official competition deadline) on the GDI surprise dialect track.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="coltekin-etal-2018-tubingen">
<titleInfo>
<title>Tübingen-Oslo Team at the VarDial 2018 Evaluation Campaign: An Analysis of N-gram Features in Language Variety Identification</title>
</titleInfo>
<name type="personal">
<namePart type="given">Çağrı</namePart>
<namePart type="family">Çöltekin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Taraka</namePart>
<namePart type="family">Rama</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Verena</namePart>
<namePart type="family">Blaschke</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2018-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fifth Workshop on NLP for Similar Languages, Varieties and Dialects (VarDial 2018)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Marcos</namePart>
<namePart type="family">Zampieri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Preslav</namePart>
<namePart type="family">Nakov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nikola</namePart>
<namePart type="family">Ljubešić</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jörg</namePart>
<namePart type="family">Tiedemann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shervin</namePart>
<namePart type="family">Malmasi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ahmed</namePart>
<namePart type="family">Ali</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Santa Fe, New Mexico, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper describes our systems for the VarDial 2018 evaluation campaign. We participated in all language identification tasks, namely, Arabic dialect identification (ADI), German dialect identification (GDI), discriminating between Dutch and Flemish in Subtitles (DFS), and Indo-Aryan Language Identification (ILI). In all of the tasks, we only used textual transcripts (not using audio features for ADI). We submitted system runs based on support vector machine classifiers (SVMs) with bag of character and word n-grams as features, and gated bidirectional recurrent neural networks (RNNs) using units of characters and words. Our SVM models outperformed our RNN models in all tasks, obtaining the first place on the DFS task, third place on the ADI task, and second place on others according to the official rankings. As well as describing the models we used in the shared task participation, we present an analysis of the n-gram features used by the SVM models in each task, and also report additional results (that were run after the official competition deadline) on the GDI surprise dialect track.</abstract>
<identifier type="citekey">coltekin-etal-2018-tubingen</identifier>
<location>
<url>https://aclanthology.org/W18-3906</url>
</location>
<part>
<date>2018-08</date>
<extent unit="page">
<start>55</start>
<end>65</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Tübingen-Oslo Team at the VarDial 2018 Evaluation Campaign: An Analysis of N-gram Features in Language Variety Identification
%A Çöltekin, Çağrı
%A Rama, Taraka
%A Blaschke, Verena
%Y Zampieri, Marcos
%Y Nakov, Preslav
%Y Ljubešić, Nikola
%Y Tiedemann, Jörg
%Y Malmasi, Shervin
%Y Ali, Ahmed
%S Proceedings of the Fifth Workshop on NLP for Similar Languages, Varieties and Dialects (VarDial 2018)
%D 2018
%8 August
%I Association for Computational Linguistics
%C Santa Fe, New Mexico, USA
%F coltekin-etal-2018-tubingen
%X This paper describes our systems for the VarDial 2018 evaluation campaign. We participated in all language identification tasks, namely, Arabic dialect identification (ADI), German dialect identification (GDI), discriminating between Dutch and Flemish in Subtitles (DFS), and Indo-Aryan Language Identification (ILI). In all of the tasks, we only used textual transcripts (not using audio features for ADI). We submitted system runs based on support vector machine classifiers (SVMs) with bag of character and word n-grams as features, and gated bidirectional recurrent neural networks (RNNs) using units of characters and words. Our SVM models outperformed our RNN models in all tasks, obtaining the first place on the DFS task, third place on the ADI task, and second place on others according to the official rankings. As well as describing the models we used in the shared task participation, we present an analysis of the n-gram features used by the SVM models in each task, and also report additional results (that were run after the official competition deadline) on the GDI surprise dialect track.
%U https://aclanthology.org/W18-3906
%P 55-65
Markdown (Informal)
[Tübingen-Oslo Team at the VarDial 2018 Evaluation Campaign: An Analysis of N-gram Features in Language Variety Identification](https://aclanthology.org/W18-3906) (Çöltekin et al., VarDial 2018)
ACL