@inproceedings{maamouri-etal-2008-diacritic,
title = "Diacritic Annotation in the {A}rabic Treebank and its Impact on Parser Evaluation",
author = "Maamouri, Mohamed and
Kulick, Seth and
Bies, Ann",
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Maegaard, Bente and
Mariani, Joseph and
Odijk, Jan and
Piperidis, Stelios and
Tapias, Daniel",
booktitle = "Proceedings of the Sixth International Conference on Language Resources and Evaluation ({LREC}'08)",
month = may,
year = "2008",
address = "Marrakech, Morocco",
publisher = "European Language Resources Association (ELRA)",
url = "http://www.lrec-conf.org/proceedings/lrec2008/pdf/706_paper.pdf",
abstract = "The Arabic Treebank (ATB), released by the Linguistic Data Consortium, contains multiple annotation files for each source file, due in part to the role of diacritic inclusion in the annotation process. The data is made available in both vocalized and unvocalized forms, with and without the diacritic marks, respectively. Much parsing work with the ATB has used the unvocalized form, on the basis that it more closely represents the real-world situation. We point out some problems with this usage of the unvocalized data and explain why the unvocalized form does not in fact represent real-world data. This is due to some aspects of the treebank annotation that to our knowledge have never before been published.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="maamouri-etal-2008-diacritic">
<titleInfo>
<title>Diacritic Annotation in the Arabic Treebank and its Impact on Parser Evaluation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mohamed</namePart>
<namePart type="family">Maamouri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Seth</namePart>
<namePart type="family">Kulick</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ann</namePart>
<namePart type="family">Bies</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2008-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Sixth International Conference on Language Resources and Evaluation (LREC’08)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stelios</namePart>
<namePart type="family">Piperidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Tapias</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Marrakech, Morocco</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The Arabic Treebank (ATB), released by the Linguistic Data Consortium, contains multiple annotation files for each source file, due in part to the role of diacritic inclusion in the annotation process. The data is made available in both vocalized and unvocalized forms, with and without the diacritic marks, respectively. Much parsing work with the ATB has used the unvocalized form, on the basis that it more closely represents the real-world situation. We point out some problems with this usage of the unvocalized data and explain why the unvocalized form does not in fact represent real-world data. This is due to some aspects of the treebank annotation that to our knowledge have never before been published.</abstract>
<identifier type="citekey">maamouri-etal-2008-diacritic</identifier>
<location>
<url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/706_paper.pdf</url>
</location>
<part>
<date>2008-05</date>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Diacritic Annotation in the Arabic Treebank and its Impact on Parser Evaluation
%A Maamouri, Mohamed
%A Kulick, Seth
%A Bies, Ann
%Y Calzolari, Nicoletta
%Y Choukri, Khalid
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Odijk, Jan
%Y Piperidis, Stelios
%Y Tapias, Daniel
%S Proceedings of the Sixth International Conference on Language Resources and Evaluation (LREC’08)
%D 2008
%8 May
%I European Language Resources Association (ELRA)
%C Marrakech, Morocco
%F maamouri-etal-2008-diacritic
%X The Arabic Treebank (ATB), released by the Linguistic Data Consortium, contains multiple annotation files for each source file, due in part to the role of diacritic inclusion in the annotation process. The data is made available in both vocalized and unvocalized forms, with and without the diacritic marks, respectively. Much parsing work with the ATB has used the unvocalized form, on the basis that it more closely represents the real-world situation. We point out some problems with this usage of the unvocalized data and explain why the unvocalized form does not in fact represent real-world data. This is due to some aspects of the treebank annotation that to our knowledge have never before been published.
%U http://www.lrec-conf.org/proceedings/lrec2008/pdf/706_paper.pdf
Markdown (Informal)
[Diacritic Annotation in the Arabic Treebank and its Impact on Parser Evaluation](http://www.lrec-conf.org/proceedings/lrec2008/pdf/706_paper.pdf) (Maamouri et al., LREC 2008)
ACL