@inproceedings{nisioi-etal-2016-vanilla,
title = "Vanilla Classifiers for Distinguishing between Similar Languages",
author = "Nisioi, Sergiu and
Ciobanu, Alina Maria and
Dinu, Liviu P.",
editor = {Nakov, Preslav and
Zampieri, Marcos and
Tan, Liling and
Ljube{\v{s}}i{\'c}, Nikola and
Tiedemann, J{\"o}rg and
Malmasi, Shervin},
booktitle = "Proceedings of the Third Workshop on {NLP} for Similar Languages, Varieties and Dialects ({V}ar{D}ial3)",
month = dec,
year = "2016",
address = "Osaka, Japan",
publisher = "The COLING 2016 Organizing Committee",
url = "https://aclanthology.org/W16-4830/",
pages = "235--242",
abstract = "In this paper we describe the submission of the UniBuc-NLP team for the Discriminating between Similar Languages Shared Task, DSL 2016. We present and analyze the results we obtained in the closed track of sub-task 1 (Similar languages and language varieties) and sub-task 2 (Arabic dialects). For sub-task 1 we used a logistic regression classifier with tf-idf feature weighting and for sub-task 2 a character-based string kernel with an SVM classifier. Our results show that good accuracy scores can be obtained with limited feature and model engineering. While certain limitations are to be acknowledged, our approach worked surprisingly well for out-of-domain, social media data, with 0.898 accuracy (3rd place) for dataset B1 and 0.838 accuracy (4th place) for dataset B2."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="nisioi-etal-2016-vanilla">
<titleInfo>
<title>Vanilla Classifiers for Distinguishing between Similar Languages</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sergiu</namePart>
<namePart type="family">Nisioi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alina</namePart>
<namePart type="given">Maria</namePart>
<namePart type="family">Ciobanu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Liviu</namePart>
<namePart type="given">P</namePart>
<namePart type="family">Dinu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2016-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Third Workshop on NLP for Similar Languages, Varieties and Dialects (VarDial3)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Preslav</namePart>
<namePart type="family">Nakov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marcos</namePart>
<namePart type="family">Zampieri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Liling</namePart>
<namePart type="family">Tan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nikola</namePart>
<namePart type="family">Ljubešić</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jörg</namePart>
<namePart type="family">Tiedemann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shervin</namePart>
<namePart type="family">Malmasi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>The COLING 2016 Organizing Committee</publisher>
<place>
<placeTerm type="text">Osaka, Japan</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this paper we describe the submission of the UniBuc-NLP team for the Discriminating between Similar Languages Shared Task, DSL 2016. We present and analyze the results we obtained in the closed track of sub-task 1 (Similar languages and language varieties) and sub-task 2 (Arabic dialects). For sub-task 1 we used a logistic regression classifier with tf-idf feature weighting and for sub-task 2 a character-based string kernel with an SVM classifier. Our results show that good accuracy scores can be obtained with limited feature and model engineering. While certain limitations are to be acknowledged, our approach worked surprisingly well for out-of-domain, social media data, with 0.898 accuracy (3rd place) for dataset B1 and 0.838 accuracy (4th place) for dataset B2.</abstract>
<identifier type="citekey">nisioi-etal-2016-vanilla</identifier>
<location>
<url>https://aclanthology.org/W16-4830/</url>
</location>
<part>
<date>2016-12</date>
<extent unit="page">
<start>235</start>
<end>242</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Vanilla Classifiers for Distinguishing between Similar Languages
%A Nisioi, Sergiu
%A Ciobanu, Alina Maria
%A Dinu, Liviu P.
%Y Nakov, Preslav
%Y Zampieri, Marcos
%Y Tan, Liling
%Y Ljubešić, Nikola
%Y Tiedemann, Jörg
%Y Malmasi, Shervin
%S Proceedings of the Third Workshop on NLP for Similar Languages, Varieties and Dialects (VarDial3)
%D 2016
%8 December
%I The COLING 2016 Organizing Committee
%C Osaka, Japan
%F nisioi-etal-2016-vanilla
%X In this paper we describe the submission of the UniBuc-NLP team for the Discriminating between Similar Languages Shared Task, DSL 2016. We present and analyze the results we obtained in the closed track of sub-task 1 (Similar languages and language varieties) and sub-task 2 (Arabic dialects). For sub-task 1 we used a logistic regression classifier with tf-idf feature weighting and for sub-task 2 a character-based string kernel with an SVM classifier. Our results show that good accuracy scores can be obtained with limited feature and model engineering. While certain limitations are to be acknowledged, our approach worked surprisingly well for out-of-domain, social media data, with 0.898 accuracy (3rd place) for dataset B1 and 0.838 accuracy (4th place) for dataset B2.
%U https://aclanthology.org/W16-4830/
%P 235-242
Markdown (Informal)
[Vanilla Classifiers for Distinguishing between Similar Languages](https://aclanthology.org/W16-4830/) (Nisioi et al., VarDial 2016)
ACL