@inproceedings{ceolin-2022-neural,
title = "Neural Networks for Cross-domain Language Identification. Phlyers @{V}ardial 2022",
author = "Ceolin, Andrea",
editor = {Scherrer, Yves and
Jauhiainen, Tommi and
Ljube{\v{s}}i{\'c}, Nikola and
Nakov, Preslav and
Tiedemann, J{\"o}rg and
Zampieri, Marcos},
booktitle = "Proceedings of the Ninth Workshop on NLP for Similar Languages, Varieties and Dialects",
month = oct,
year = "2022",
address = "Gyeongju, Republic of Korea",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.vardial-1.11",
pages = "99--108",
abstract = "We present our contribution to the Identification of Languages and Dialects of Italy shared task (ITDI) proposed in the VarDial Evaluation Campaign 2022, which asked participants to automatically identify the language of a text associated to one of the language varieties of Italy. The method that yielded the best results in our experiments was a Deep Feedforward Neural Network (DNN) trained on character ngram counts, which provided a better performance compared to Naive Bayes methods and Convolutional Neural Networks (CNN). The system was among the best methods proposed for the ITDI shared task. The analysis of the results suggests that simple DNNs could be more efficient than CNNs to perform language identification of close varieties.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ceolin-2022-neural">
<titleInfo>
<title>Neural Networks for Cross-domain Language Identification. Phlyers @Vardial 2022</title>
</titleInfo>
<name type="personal">
<namePart type="given">Andrea</namePart>
<namePart type="family">Ceolin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-10</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Ninth Workshop on NLP for Similar Languages, Varieties and Dialects</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yves</namePart>
<namePart type="family">Scherrer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tommi</namePart>
<namePart type="family">Jauhiainen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nikola</namePart>
<namePart type="family">Ljubešić</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Preslav</namePart>
<namePart type="family">Nakov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jörg</namePart>
<namePart type="family">Tiedemann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marcos</namePart>
<namePart type="family">Zampieri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Gyeongju, Republic of Korea</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We present our contribution to the Identification of Languages and Dialects of Italy shared task (ITDI) proposed in the VarDial Evaluation Campaign 2022, which asked participants to automatically identify the language of a text associated to one of the language varieties of Italy. The method that yielded the best results in our experiments was a Deep Feedforward Neural Network (DNN) trained on character ngram counts, which provided a better performance compared to Naive Bayes methods and Convolutional Neural Networks (CNN). The system was among the best methods proposed for the ITDI shared task. The analysis of the results suggests that simple DNNs could be more efficient than CNNs to perform language identification of close varieties.</abstract>
<identifier type="citekey">ceolin-2022-neural</identifier>
<location>
<url>https://aclanthology.org/2022.vardial-1.11</url>
</location>
<part>
<date>2022-10</date>
<extent unit="page">
<start>99</start>
<end>108</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Neural Networks for Cross-domain Language Identification. Phlyers @Vardial 2022
%A Ceolin, Andrea
%Y Scherrer, Yves
%Y Jauhiainen, Tommi
%Y Ljubešić, Nikola
%Y Nakov, Preslav
%Y Tiedemann, Jörg
%Y Zampieri, Marcos
%S Proceedings of the Ninth Workshop on NLP for Similar Languages, Varieties and Dialects
%D 2022
%8 October
%I Association for Computational Linguistics
%C Gyeongju, Republic of Korea
%F ceolin-2022-neural
%X We present our contribution to the Identification of Languages and Dialects of Italy shared task (ITDI) proposed in the VarDial Evaluation Campaign 2022, which asked participants to automatically identify the language of a text associated to one of the language varieties of Italy. The method that yielded the best results in our experiments was a Deep Feedforward Neural Network (DNN) trained on character ngram counts, which provided a better performance compared to Naive Bayes methods and Convolutional Neural Networks (CNN). The system was among the best methods proposed for the ITDI shared task. The analysis of the results suggests that simple DNNs could be more efficient than CNNs to perform language identification of close varieties.
%U https://aclanthology.org/2022.vardial-1.11
%P 99-108
Markdown (Informal)
[Neural Networks for Cross-domain Language Identification. Phlyers @Vardial 2022](https://aclanthology.org/2022.vardial-1.11) (Ceolin, VarDial 2022)
ACL