@inproceedings{zaharia-etal-2020-exploring,
title = "Exploring the Power of {R}omanian {BERT} for Dialect Identification",
author = "Zaharia, George-Eduard and
Avram, Andrei-Marius and
Cercel, Dumitru-Clementin and
Rebedea, Traian",
editor = {Zampieri, Marcos and
Nakov, Preslav and
Ljube{\v{s}}i{\'c}, Nikola and
Tiedemann, J{\"o}rg and
Scherrer, Yves},
booktitle = "Proceedings of the 7th Workshop on NLP for Similar Languages, Varieties and Dialects",
month = dec,
year = "2020",
address = "Barcelona, Spain (Online)",
publisher = "International Committee on Computational Linguistics (ICCL)",
url = "https://aclanthology.org/2020.vardial-1.22/",
pages = "232--241",
abstract = "Dialect identification represents a key aspect for improving a series of tasks, for example, opinion mining, considering that the location of the speaker can greatly influence the attitude towards a subject. In this work, we describe the systems developed by our team for VarDial 2020: Romanian Dialect Identification, a task specifically created for challenging participants to solve the previously mentioned issue. More specifically, we introduce a series of neural systems based on Transformers, that combine a BERT model exclusively pre-trained on the Romanian language with techniques such as adversarial training or character-level embeddings. By using these approaches, we were able to obtain a 0.6475 macro F1 score on the test dataset, thus allowing us to be ranked 5th out of 8 participant teams."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="zaharia-etal-2020-exploring">
<titleInfo>
<title>Exploring the Power of Romanian BERT for Dialect Identification</title>
</titleInfo>
<name type="personal">
<namePart type="given">George-Eduard</namePart>
<namePart type="family">Zaharia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andrei-Marius</namePart>
<namePart type="family">Avram</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dumitru-Clementin</namePart>
<namePart type="family">Cercel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Traian</namePart>
<namePart type="family">Rebedea</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 7th Workshop on NLP for Similar Languages, Varieties and Dialects</title>
</titleInfo>
<name type="personal">
<namePart type="given">Marcos</namePart>
<namePart type="family">Zampieri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Preslav</namePart>
<namePart type="family">Nakov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nikola</namePart>
<namePart type="family">Ljubešić</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jörg</namePart>
<namePart type="family">Tiedemann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yves</namePart>
<namePart type="family">Scherrer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>International Committee on Computational Linguistics (ICCL)</publisher>
<place>
<placeTerm type="text">Barcelona, Spain (Online)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Dialect identification represents a key aspect for improving a series of tasks, for example, opinion mining, considering that the location of the speaker can greatly influence the attitude towards a subject. In this work, we describe the systems developed by our team for VarDial 2020: Romanian Dialect Identification, a task specifically created for challenging participants to solve the previously mentioned issue. More specifically, we introduce a series of neural systems based on Transformers, that combine a BERT model exclusively pre-trained on the Romanian language with techniques such as adversarial training or character-level embeddings. By using these approaches, we were able to obtain a 0.6475 macro F1 score on the test dataset, thus allowing us to be ranked 5th out of 8 participant teams.</abstract>
<identifier type="citekey">zaharia-etal-2020-exploring</identifier>
<location>
<url>https://aclanthology.org/2020.vardial-1.22/</url>
</location>
<part>
<date>2020-12</date>
<extent unit="page">
<start>232</start>
<end>241</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Exploring the Power of Romanian BERT for Dialect Identification
%A Zaharia, George-Eduard
%A Avram, Andrei-Marius
%A Cercel, Dumitru-Clementin
%A Rebedea, Traian
%Y Zampieri, Marcos
%Y Nakov, Preslav
%Y Ljubešić, Nikola
%Y Tiedemann, Jörg
%Y Scherrer, Yves
%S Proceedings of the 7th Workshop on NLP for Similar Languages, Varieties and Dialects
%D 2020
%8 December
%I International Committee on Computational Linguistics (ICCL)
%C Barcelona, Spain (Online)
%F zaharia-etal-2020-exploring
%X Dialect identification represents a key aspect for improving a series of tasks, for example, opinion mining, considering that the location of the speaker can greatly influence the attitude towards a subject. In this work, we describe the systems developed by our team for VarDial 2020: Romanian Dialect Identification, a task specifically created for challenging participants to solve the previously mentioned issue. More specifically, we introduce a series of neural systems based on Transformers, that combine a BERT model exclusively pre-trained on the Romanian language with techniques such as adversarial training or character-level embeddings. By using these approaches, we were able to obtain a 0.6475 macro F1 score on the test dataset, thus allowing us to be ranked 5th out of 8 participant teams.
%U https://aclanthology.org/2020.vardial-1.22/
%P 232-241
Markdown (Informal)
[Exploring the Power of Romanian BERT for Dialect Identification](https://aclanthology.org/2020.vardial-1.22/) (Zaharia et al., VarDial 2020)
ACL
- George-Eduard Zaharia, Andrei-Marius Avram, Dumitru-Clementin Cercel, and Traian Rebedea. 2020. Exploring the Power of Romanian BERT for Dialect Identification. In Proceedings of the 7th Workshop on NLP for Similar Languages, Varieties and Dialects, pages 232–241, Barcelona, Spain (Online). International Committee on Computational Linguistics (ICCL).