@inproceedings{eltanbouly-etal-2019-simple,
    title = {Simple But Not Na{\"i}ve: Fine-Grained {A}rabic Dialect Identification Using Only N-Grams},
    author = "Eltanbouly, Sohaila  and
      Bashendy, May  and
      Elsayed, Tamer",
    editor = "El-Hajj, Wassim  and
      Belguith, Lamia Hadrich  and
      Bougares, Fethi  and
      Magdy, Walid  and
      Zitouni, Imed  and
      Tomeh, Nadi  and
      El-Haj, Mahmoud  and
      Zaghouani, Wajdi",
    booktitle = "Proceedings of the Fourth Arabic Natural Language Processing Workshop",
    month = aug,
    year = "2019",
    address = "Florence, Italy",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/W19-4624/",
    doi = "10.18653/v1/W19-4624",
    pages = "214--218",
    abstract = "This paper presents the participation of Qatar University team in MADAR shared task, which addresses the problem of sentence-level fine-grained Arabic Dialect Identification over 25 different Arabic dialects in addition to the Modern Standard Arabic. Arabic Dialect Identification is not a trivial task since different dialects share some features, e.g., utilizing the same character set and some vocabularies. We opted to adopt a very simple approach in terms of extracted features and classification models; we only utilize word and character n-grams as features, and Na {\ensuremath{\ddot{}}}{\i}ve Bayes models as classifiers. Surprisingly, the simple approach achieved non-na {\ensuremath{\ddot{}}}{\i}ve performance. The official results, reported on a held-out testing set, show that the dialect of a given sentence can be identified at an accuracy of 64.58{\%} by our best submitted run."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="eltanbouly-etal-2019-simple">
    <titleInfo>
        <title>Simple But Not Naïve: Fine-Grained Arabic Dialect Identification Using Only N-Grams</title>
    </titleInfo>
    <name type="personal">
        <namePart type="given">Sohaila</namePart>
        <namePart type="family">Eltanbouly</namePart>
        <role>
            <roleTerm authority="marcrelator" type="text">author</roleTerm>
        </role>
    </name>
    <name type="personal">
        <namePart type="given">May</namePart>
        <namePart type="family">Bashendy</namePart>
        <role>
            <roleTerm authority="marcrelator" type="text">author</roleTerm>
        </role>
    </name>
    <name type="personal">
        <namePart type="given">Tamer</namePart>
        <namePart type="family">Elsayed</namePart>
        <role>
            <roleTerm authority="marcrelator" type="text">author</roleTerm>
        </role>
    </name>
    <originInfo>
        <dateIssued>2019-08</dateIssued>
    </originInfo>
    <typeOfResource>text</typeOfResource>
    <relatedItem type="host">
        <titleInfo>
            <title>Proceedings of the Fourth Arabic Natural Language Processing Workshop</title>
        </titleInfo>
        <name type="personal">
            <namePart type="given">Wassim</namePart>
            <namePart type="family">El-Hajj</namePart>
            <role>
                <roleTerm authority="marcrelator" type="text">editor</roleTerm>
            </role>
        </name>
        <name type="personal">
            <namePart type="given">Lamia</namePart>
            <namePart type="given">Hadrich</namePart>
            <namePart type="family">Belguith</namePart>
            <role>
                <roleTerm authority="marcrelator" type="text">editor</roleTerm>
            </role>
        </name>
        <name type="personal">
            <namePart type="given">Fethi</namePart>
            <namePart type="family">Bougares</namePart>
            <role>
                <roleTerm authority="marcrelator" type="text">editor</roleTerm>
            </role>
        </name>
        <name type="personal">
            <namePart type="given">Walid</namePart>
            <namePart type="family">Magdy</namePart>
            <role>
                <roleTerm authority="marcrelator" type="text">editor</roleTerm>
            </role>
        </name>
        <name type="personal">
            <namePart type="given">Imed</namePart>
            <namePart type="family">Zitouni</namePart>
            <role>
                <roleTerm authority="marcrelator" type="text">editor</roleTerm>
            </role>
        </name>
        <name type="personal">
            <namePart type="given">Nadi</namePart>
            <namePart type="family">Tomeh</namePart>
            <role>
                <roleTerm authority="marcrelator" type="text">editor</roleTerm>
            </role>
        </name>
        <name type="personal">
            <namePart type="given">Mahmoud</namePart>
            <namePart type="family">El-Haj</namePart>
            <role>
                <roleTerm authority="marcrelator" type="text">editor</roleTerm>
            </role>
        </name>
        <name type="personal">
            <namePart type="given">Wajdi</namePart>
            <namePart type="family">Zaghouani</namePart>
            <role>
                <roleTerm authority="marcrelator" type="text">editor</roleTerm>
            </role>
        </name>
        <originInfo>
            <publisher>Association for Computational Linguistics</publisher>
            <place>
                <placeTerm type="text">Florence, Italy</placeTerm>
            </place>
        </originInfo>
        <genre authority="marcgt">conference publication</genre>
    </relatedItem>
    <abstract>This paper presents the participation of Qatar University team in MADAR shared task, which addresses the problem of sentence-level fine-grained Arabic Dialect Identification over 25 different Arabic dialects in addition to the Modern Standard Arabic. Arabic Dialect Identification is not a trivial task since different dialects share some features, e.g., utilizing the same character set and some vocabularies. We opted to adopt a very simple approach in terms of extracted features and classification models; we only utilize word and character n-grams as features, and Na \ensuremath\ddotıve Bayes models as classifiers. Surprisingly, the simple approach achieved non-na \ensuremath\ddotıve performance. The official results, reported on a held-out testing set, show that the dialect of a given sentence can be identified at an accuracy of 64.58% by our best submitted run.</abstract>
    <identifier type="citekey">eltanbouly-etal-2019-simple</identifier>
    <identifier type="doi">10.18653/v1/W19-4624</identifier>
    <location>
        <url>https://aclanthology.org/W19-4624/</url>
    </location>
    <part>
        <date>2019-08</date>
        <extent unit="page">
            <start>214</start>
            <end>218</end>
        </extent>
    </part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Simple But Not Naïve: Fine-Grained Arabic Dialect Identification Using Only N-Grams
%A Eltanbouly, Sohaila
%A Bashendy, May
%A Elsayed, Tamer
%Y El-Hajj, Wassim
%Y Belguith, Lamia Hadrich
%Y Bougares, Fethi
%Y Magdy, Walid
%Y Zitouni, Imed
%Y Tomeh, Nadi
%Y El-Haj, Mahmoud
%Y Zaghouani, Wajdi
%S Proceedings of the Fourth Arabic Natural Language Processing Workshop
%D 2019
%8 August
%I Association for Computational Linguistics
%C Florence, Italy
%F eltanbouly-etal-2019-simple
%X This paper presents the participation of Qatar University team in MADAR shared task, which addresses the problem of sentence-level fine-grained Arabic Dialect Identification over 25 different Arabic dialects in addition to the Modern Standard Arabic. Arabic Dialect Identification is not a trivial task since different dialects share some features, e.g., utilizing the same character set and some vocabularies. We opted to adopt a very simple approach in terms of extracted features and classification models; we only utilize word and character n-grams as features, and Na \ensuremath\ddotıve Bayes models as classifiers. Surprisingly, the simple approach achieved non-na \ensuremath\ddotıve performance. The official results, reported on a held-out testing set, show that the dialect of a given sentence can be identified at an accuracy of 64.58% by our best submitted run.
%R 10.18653/v1/W19-4624
%U https://aclanthology.org/W19-4624/
%U https://doi.org/10.18653/v1/W19-4624
%P 214-218
Markdown (Informal)
[Simple But Not Naïve: Fine-Grained Arabic Dialect Identification Using Only N-Grams](https://aclanthology.org/W19-4624/) (Eltanbouly et al., WANLP 2019)
ACL