@inproceedings{dukes-habash-2010-morphological,
title = "Morphological Annotation of {Q}uranic {A}rabic",
author = "Dukes, Kais and
Habash, Nizar",
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Maegaard, Bente and
Mariani, Joseph and
Odijk, Jan and
Piperidis, Stelios and
Rosner, Mike and
Tapias, Daniel",
booktitle = "Proceedings of the Seventh International Conference on Language Resources and Evaluation ({LREC}'10)",
month = may,
year = "2010",
address = "Valletta, Malta",
publisher = "European Language Resources Association (ELRA)",
url = "http://www.lrec-conf.org/proceedings/lrec2010/pdf/276_Paper.pdf",
abstract = "The Quranic Arabic Corpus (\url{http://corpus.quran.com}) is an annotated linguistic resource with multiple layers of annotation including morphological segmentation, part-of-speech tagging, and syntactic analysis using dependency grammar. The motivation behind this work is to produce a resource that enables further analysis of the Quran, the 1,400 year old central religious text of Islam. This paper describes a new approach to morphological annotation of Quranic Arabic, a genre difficult to compare with other forms of Arabic. Processing Quranic Arabic is a unique challenge from a computational point of view, since the vocabulary and spelling differ from Modern Standard Arabic. The Quranic Arabic Corpus differs from other Arabic computational resources in adopting a tagset that closely follows traditional Arabic grammar. We made this decision in order to leverage a large body of existing historical grammatical analysis, and to encourage online collaborative annotation. In this paper, we discuss how the unique challenge of morphological annotation of Quranic Arabic is solved using a multi-stage approach. The different stages include automatic morphological tagging using diacritic edit-distance, two-pass manual verification, and online collaborative annotation. This process is evaluated to validate the appropriateness of the chosen methodology.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="dukes-habash-2010-morphological">
<titleInfo>
<title>Morphological Annotation of Quranic Arabic</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kais</namePart>
<namePart type="family">Dukes</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nizar</namePart>
<namePart type="family">Habash</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2010-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Seventh International Conference on Language Resources and Evaluation (LREC’10)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stelios</namePart>
<namePart type="family">Piperidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mike</namePart>
<namePart type="family">Rosner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Tapias</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Valletta, Malta</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The Quranic Arabic Corpus (http://corpus.quran.com) is an annotated linguistic resource with multiple layers of annotation including morphological segmentation, part-of-speech tagging, and syntactic analysis using dependency grammar. The motivation behind this work is to produce a resource that enables further analysis of the Quran, the 1,400 year old central religious text of Islam. This paper describes a new approach to morphological annotation of Quranic Arabic, a genre difficult to compare with other forms of Arabic. Processing Quranic Arabic is a unique challenge from a computational point of view, since the vocabulary and spelling differ from Modern Standard Arabic. The Quranic Arabic Corpus differs from other Arabic computational resources in adopting a tagset that closely follows traditional Arabic grammar. We made this decision in order to leverage a large body of existing historical grammatical analysis, and to encourage online collaborative annotation. In this paper, we discuss how the unique challenge of morphological annotation of Quranic Arabic is solved using a multi-stage approach. The different stages include automatic morphological tagging using diacritic edit-distance, two-pass manual verification, and online collaborative annotation. This process is evaluated to validate the appropriateness of the chosen methodology.</abstract>
<identifier type="citekey">dukes-habash-2010-morphological</identifier>
<location>
<url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/276_Paper.pdf</url>
</location>
<part>
<date>2010-05</date>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Morphological Annotation of Quranic Arabic
%A Dukes, Kais
%A Habash, Nizar
%Y Calzolari, Nicoletta
%Y Choukri, Khalid
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Odijk, Jan
%Y Piperidis, Stelios
%Y Rosner, Mike
%Y Tapias, Daniel
%S Proceedings of the Seventh International Conference on Language Resources and Evaluation (LREC’10)
%D 2010
%8 May
%I European Language Resources Association (ELRA)
%C Valletta, Malta
%F dukes-habash-2010-morphological
%X The Quranic Arabic Corpus (http://corpus.quran.com) is an annotated linguistic resource with multiple layers of annotation including morphological segmentation, part-of-speech tagging, and syntactic analysis using dependency grammar. The motivation behind this work is to produce a resource that enables further analysis of the Quran, the 1,400 year old central religious text of Islam. This paper describes a new approach to morphological annotation of Quranic Arabic, a genre difficult to compare with other forms of Arabic. Processing Quranic Arabic is a unique challenge from a computational point of view, since the vocabulary and spelling differ from Modern Standard Arabic. The Quranic Arabic Corpus differs from other Arabic computational resources in adopting a tagset that closely follows traditional Arabic grammar. We made this decision in order to leverage a large body of existing historical grammatical analysis, and to encourage online collaborative annotation. In this paper, we discuss how the unique challenge of morphological annotation of Quranic Arabic is solved using a multi-stage approach. The different stages include automatic morphological tagging using diacritic edit-distance, two-pass manual verification, and online collaborative annotation. This process is evaluated to validate the appropriateness of the chosen methodology.
%U http://www.lrec-conf.org/proceedings/lrec2010/pdf/276_Paper.pdf
Markdown (Informal)
[Morphological Annotation of Quranic Arabic](http://www.lrec-conf.org/proceedings/lrec2010/pdf/276_Paper.pdf) (Dukes & Habash, LREC 2010)
ACL
- Kais Dukes and Nizar Habash. 2010. Morphological Annotation of Quranic Arabic. In Proceedings of the Seventh International Conference on Language Resources and Evaluation (LREC'10), Valletta, Malta. European Language Resources Association (ELRA).