@inproceedings{elgabou-kazakov-2017-building,
title = "Building Dialectal {A}rabic Corpora",
author = "Elgabou, Hani and
Kazakov, Dimitar",
editor = "Temnikova, Irina and
Orasan, Constantin and
Pastor, Gloria Corpas and
Vogel, Stephan",
booktitle = "Proceedings of the Workshop Human-Informed Translation and Interpreting Technology",
month = sep,
year = "2017",
address = "Varna, Bulgaria",
publisher = "Association for Computational Linguistics, Shoumen, Bulgaria",
url = "https://doi.org/10.26615/978-954-452-042-7_007",
doi = "10.26615/978-954-452-042-7_007",
pages = "52--57",
abstract = "The aim of this research is to identify local Arabic dialects in texts from social media (Twitter) and link them to specific geographic areas. Dialect identification is studied as a subset of the task of language identification. The proposed method is based on unsupervised learning using simultaneously lexical and geographic distance. While this study focusses on Libyan dialects, the approach is general, and could produce resources to support human translators and interpreters when dealing with vernaculars rather than standard Arabic.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="elgabou-kazakov-2017-building">
<titleInfo>
<title>Building Dialectal Arabic Corpora</title>
</titleInfo>
<name type="personal">
<namePart type="given">Hani</namePart>
<namePart type="family">Elgabou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dimitar</namePart>
<namePart type="family">Kazakov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2017-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Workshop Human-Informed Translation and Interpreting Technology</title>
</titleInfo>
<name type="personal">
<namePart type="given">Irina</namePart>
<namePart type="family">Temnikova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Constantin</namePart>
<namePart type="family">Orasan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gloria</namePart>
<namePart type="given">Corpas</namePart>
<namePart type="family">Pastor</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stephan</namePart>
<namePart type="family">Vogel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics, Shoumen, Bulgaria</publisher>
<place>
<placeTerm type="text">Varna, Bulgaria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The aim of this research is to identify local Arabic dialects in texts from social media (Twitter) and link them to specific geographic areas. Dialect identification is studied as a subset of the task of language identification. The proposed method is based on unsupervised learning using simultaneously lexical and geographic distance. While this study focusses on Libyan dialects, the approach is general, and could produce resources to support human translators and interpreters when dealing with vernaculars rather than standard Arabic.</abstract>
<identifier type="citekey">elgabou-kazakov-2017-building</identifier>
<identifier type="doi">10.26615/978-954-452-042-7_007</identifier>
<part>
<date>2017-09</date>
<extent unit="page">
<start>52</start>
<end>57</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Building Dialectal Arabic Corpora
%A Elgabou, Hani
%A Kazakov, Dimitar
%Y Temnikova, Irina
%Y Orasan, Constantin
%Y Pastor, Gloria Corpas
%Y Vogel, Stephan
%S Proceedings of the Workshop Human-Informed Translation and Interpreting Technology
%D 2017
%8 September
%I Association for Computational Linguistics, Shoumen, Bulgaria
%C Varna, Bulgaria
%F elgabou-kazakov-2017-building
%X The aim of this research is to identify local Arabic dialects in texts from social media (Twitter) and link them to specific geographic areas. Dialect identification is studied as a subset of the task of language identification. The proposed method is based on unsupervised learning using simultaneously lexical and geographic distance. While this study focusses on Libyan dialects, the approach is general, and could produce resources to support human translators and interpreters when dealing with vernaculars rather than standard Arabic.
%R 10.26615/978-954-452-042-7_007
%U https://doi.org/10.26615/978-954-452-042-7_007
%P 52-57
Markdown (Informal)
[Building Dialectal Arabic Corpora](https://doi.org/10.26615/978-954-452-042-7_007) (Elgabou & Kazakov, RANLP 2017)
ACL
- Hani Elgabou and Dimitar Kazakov. 2017. Building Dialectal Arabic Corpora. In Proceedings of the Workshop Human-Informed Translation and Interpreting Technology, pages 52–57, Varna, Bulgaria. Association for Computational Linguistics, Shoumen, Bulgaria.