@inproceedings{maamouri-etal-2006-diacritization,
title = "Diacritization: A Challenge to {A}rabic Treebank Annotation and Parsing",
author = "Maamouri, Mohamed and
Kulick, Seth and
Bies, Ann",
booktitle = "Proceedings of the International Conference on the Challenge of Arabic for NLP/MT",
month = oct # " 23",
year = "2006",
address = "London, UK",
url = "https://aclanthology.org/2006.bcs-1.4/",
pages = "35--47",
abstract = "Arabic diacritization (referred to sometimes as vocalization or vowelling), defined as the full or partial representation of short vowels, shadda (consonantal length or germination), tanween (nunation or definiteness), and hamza (the glottal stop and its support letters), is still largely understudied in the current NLP literature. In this paper, the lack of diacritics in standard Arabic texts is presented as a major challenge to most Arabic natural language processing tasks, including parsing. Recent studies (Messaoudi, et al. 2004; Vergyri {\&} Kirchhoff 2004; Zitouni, et al. 2006 and Maamouri, et al. forthcoming) about the place and impact of diacritization in text-based NLP research are presented along with an analysis of the weight of the missing diacritics on Treebank morphological and syntactic analyses and the impact on parser development."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="maamouri-etal-2006-diacritization">
<titleInfo>
<title>Diacritization: A Challenge to Arabic Treebank Annotation and Parsing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mohamed</namePart>
<namePart type="family">Maamouri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Seth</namePart>
<namePart type="family">Kulick</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ann</namePart>
<namePart type="family">Bies</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2006-oct 23</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the International Conference on the Challenge of Arabic for NLP/MT</title>
</titleInfo>
<originInfo>
<place>
<placeTerm type="text">London, UK</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Arabic diacritization (referred to sometimes as vocalization or vowelling), defined as the full or partial representation of short vowels, shadda (consonantal length or germination), tanween (nunation or definiteness), and hamza (the glottal stop and its support letters), is still largely understudied in the current NLP literature. In this paper, the lack of diacritics in standard Arabic texts is presented as a major challenge to most Arabic natural language processing tasks, including parsing. Recent studies (Messaoudi, et al. 2004; Vergyri & Kirchhoff 2004; Zitouni, et al. 2006 and Maamouri, et al. forthcoming) about the place and impact of diacritization in text-based NLP research are presented along with an analysis of the weight of the missing diacritics on Treebank morphological and syntactic analyses and the impact on parser development.</abstract>
<identifier type="citekey">maamouri-etal-2006-diacritization</identifier>
<location>
<url>https://aclanthology.org/2006.bcs-1.4/</url>
</location>
<part>
<date>2006-oct 23</date>
<extent unit="page">
<start>35</start>
<end>47</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Diacritization: A Challenge to Arabic Treebank Annotation and Parsing
%A Maamouri, Mohamed
%A Kulick, Seth
%A Bies, Ann
%S Proceedings of the International Conference on the Challenge of Arabic for NLP/MT
%D 2006
%8 oct 23
%C London, UK
%F maamouri-etal-2006-diacritization
%X Arabic diacritization (referred to sometimes as vocalization or vowelling), defined as the full or partial representation of short vowels, shadda (consonantal length or germination), tanween (nunation or definiteness), and hamza (the glottal stop and its support letters), is still largely understudied in the current NLP literature. In this paper, the lack of diacritics in standard Arabic texts is presented as a major challenge to most Arabic natural language processing tasks, including parsing. Recent studies (Messaoudi, et al. 2004; Vergyri & Kirchhoff 2004; Zitouni, et al. 2006 and Maamouri, et al. forthcoming) about the place and impact of diacritization in text-based NLP research are presented along with an analysis of the weight of the missing diacritics on Treebank morphological and syntactic analyses and the impact on parser development.
%U https://aclanthology.org/2006.bcs-1.4/
%P 35-47
Markdown (Informal)
[Diacritization: A Challenge to Arabic Treebank Annotation and Parsing](https://aclanthology.org/2006.bcs-1.4/) (Maamouri et al., BCS 2006)
ACL