@inproceedings{criscuolo-aluisio-2017-discriminating,
title = "Discriminating between Similar Languages with Word-level Convolutional Neural Networks",
author = "Criscuolo, Marcelo and
Alu{\'\i}sio, Sandra Maria",
editor = {Nakov, Preslav and
Zampieri, Marcos and
Ljube{\v{s}}i{\'c}, Nikola and
Tiedemann, J{\"o}rg and
Malmasi, Shevin and
Ali, Ahmed},
booktitle = "Proceedings of the Fourth Workshop on {NLP} for Similar Languages, Varieties and Dialects ({V}ar{D}ial)",
month = apr,
year = "2017",
address = "Valencia, Spain",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W17-1215",
doi = "10.18653/v1/W17-1215",
pages = "124--130",
abstract = "Discriminating between Similar Languages (DSL) is a challenging task addressed at the VarDial Workshop series. We report on our participation in the DSL shared task with a two-stage system. In the first stage, character n-grams are used to separate language groups, then specialized classifiers distinguish similar language varieties. We have conducted experiments with three system configurations and submitted one run for each. Our main approach is a word-level convolutional neural network (CNN) that learns task-specific vectors with minimal text preprocessing. We also experiment with multi-layer perceptron (MLP) networks and another hybrid configuration. Our best run achieved an accuracy of 90.76{\%}, ranking 8th among 11 participants and getting very close to the system that ranked first (less than 2 points). Even though the CNN model could not achieve the best results, it still makes a viable approach to discriminating between similar languages.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="criscuolo-aluisio-2017-discriminating">
<titleInfo>
<title>Discriminating between Similar Languages with Word-level Convolutional Neural Networks</title>
</titleInfo>
<name type="personal">
<namePart type="given">Marcelo</namePart>
<namePart type="family">Criscuolo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sandra</namePart>
<namePart type="given">Maria</namePart>
<namePart type="family">Aluísio</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2017-04</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fourth Workshop on NLP for Similar Languages, Varieties and Dialects (VarDial)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Preslav</namePart>
<namePart type="family">Nakov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marcos</namePart>
<namePart type="family">Zampieri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nikola</namePart>
<namePart type="family">Ljubešić</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jörg</namePart>
<namePart type="family">Tiedemann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shevin</namePart>
<namePart type="family">Malmasi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ahmed</namePart>
<namePart type="family">Ali</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Valencia, Spain</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Discriminating between Similar Languages (DSL) is a challenging task addressed at the VarDial Workshop series. We report on our participation in the DSL shared task with a two-stage system. In the first stage, character n-grams are used to separate language groups, then specialized classifiers distinguish similar language varieties. We have conducted experiments with three system configurations and submitted one run for each. Our main approach is a word-level convolutional neural network (CNN) that learns task-specific vectors with minimal text preprocessing. We also experiment with multi-layer perceptron (MLP) networks and another hybrid configuration. Our best run achieved an accuracy of 90.76%, ranking 8th among 11 participants and getting very close to the system that ranked first (less than 2 points). Even though the CNN model could not achieve the best results, it still makes a viable approach to discriminating between similar languages.</abstract>
<identifier type="citekey">criscuolo-aluisio-2017-discriminating</identifier>
<identifier type="doi">10.18653/v1/W17-1215</identifier>
<location>
<url>https://aclanthology.org/W17-1215</url>
</location>
<part>
<date>2017-04</date>
<extent unit="page">
<start>124</start>
<end>130</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Discriminating between Similar Languages with Word-level Convolutional Neural Networks
%A Criscuolo, Marcelo
%A Aluísio, Sandra Maria
%Y Nakov, Preslav
%Y Zampieri, Marcos
%Y Ljubešić, Nikola
%Y Tiedemann, Jörg
%Y Malmasi, Shevin
%Y Ali, Ahmed
%S Proceedings of the Fourth Workshop on NLP for Similar Languages, Varieties and Dialects (VarDial)
%D 2017
%8 April
%I Association for Computational Linguistics
%C Valencia, Spain
%F criscuolo-aluisio-2017-discriminating
%X Discriminating between Similar Languages (DSL) is a challenging task addressed at the VarDial Workshop series. We report on our participation in the DSL shared task with a two-stage system. In the first stage, character n-grams are used to separate language groups, then specialized classifiers distinguish similar language varieties. We have conducted experiments with three system configurations and submitted one run for each. Our main approach is a word-level convolutional neural network (CNN) that learns task-specific vectors with minimal text preprocessing. We also experiment with multi-layer perceptron (MLP) networks and another hybrid configuration. Our best run achieved an accuracy of 90.76%, ranking 8th among 11 participants and getting very close to the system that ranked first (less than 2 points). Even though the CNN model could not achieve the best results, it still makes a viable approach to discriminating between similar languages.
%R 10.18653/v1/W17-1215
%U https://aclanthology.org/W17-1215
%U https://doi.org/10.18653/v1/W17-1215
%P 124-130
Markdown (Informal)
[Discriminating between Similar Languages with Word-level Convolutional Neural Networks](https://aclanthology.org/W17-1215) (Criscuolo & Aluísio, VarDial 2017)
ACL